From ac370a73710901152a2197b91a1bbe997aafd34c Mon Sep 17 00:00:00 2001
From: sayantn
Date: Sun, 7 Jul 2024 16:02:36 +0530
Subject: [PATCH 01/11] AVX512FP16 Part 0: Types
---
crates/core_arch/src/lib.rs | 3 +-
crates/core_arch/src/simd.rs | 73 +++++++++++++++++++++-
crates/core_arch/src/x86/mod.rs | 79 ++++++++++++++++++++++++
crates/core_arch/src/x86/test.rs | 33 ++++++++++
crates/stdarch-verify/src/lib.rs | 4 ++
crates/stdarch-verify/tests/x86-intel.rs | 19 ++++++
6 files changed, 208 insertions(+), 3 deletions(-)
diff --git a/crates/core_arch/src/lib.rs b/crates/core_arch/src/lib.rs
index 1901149074..a7a02783e0 100644
--- a/crates/core_arch/src/lib.rs
+++ b/crates/core_arch/src/lib.rs
@@ -34,7 +34,8 @@
target_feature_11,
generic_arg_infer,
asm_experimental_arch,
- sha512_sm_x86
+ sha512_sm_x86,
+ f16
)]
#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
#![deny(clippy::missing_inline_in_public_items)]
diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs
index 4c637f49f3..3082334102 100644
--- a/crates/core_arch/src/simd.rs
+++ b/crates/core_arch/src/simd.rs
@@ -3,9 +3,10 @@
#![allow(non_camel_case_types)]
macro_rules! simd_ty {
- ($id:ident [$ety:ident]: $($elem_name:ident),*) => {
+ ($(#[$stability:meta])? $id:ident [$ety:ident]: $($elem_name:ident),*) => {
#[repr(simd)]
#[derive(Copy, Clone, Debug, PartialEq)]
+ $(#[$stability])?
pub(crate) struct $id { $(pub $elem_name: $ety),* }
#[allow(clippy::use_self)]
@@ -186,9 +187,20 @@ simd_ty!(
simd_ty!(i32x4[i32]: x0, x1, x2, x3);
simd_ty!(i64x2[i64]: x0, x1);
+simd_ty!(
+ #[unstable(feature = "f16", issue = "116909")]
+ f16x8[f16]:
+ x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7
+);
simd_ty!(f32x4[f32]: x0, x1, x2, x3);
simd_ty!(f64x2[f64]: x0, x1);
-simd_ty!(f64x4[f64]: x0, x1, x2, x3);
simd_m_ty!(
m8x16[i8]:
@@ -359,6 +371,26 @@ simd_ty!(
);
simd_ty!(i64x4[i64]: x0, x1, x2, x3);
+simd_ty!(
+ #[unstable(feature = "f16", issue = "116909")]
+ f16x16[f16]:
+ x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15
+);
simd_ty!(
f32x8[f32]:
x0,
@@ -370,6 +402,7 @@ simd_ty!(
x6,
x7
);
+simd_ty!(f64x4[f64]: x0, x1, x2, x3);
simd_m_ty!(
m8x32[i8]:
@@ -688,6 +721,42 @@ simd_ty!(
x15
);
+simd_ty!(
+ #[unstable(feature = "f16", issue = "116909")]
+ f16x32[f16]:
+ x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15,
+ x16,
+ x17,
+ x18,
+ x19,
+ x20,
+ x21,
+ x22,
+ x23,
+ x24,
+ x25,
+ x26,
+ x27,
+ x28,
+ x29,
+ x30,
+ x31
+);
simd_ty!(
f32x16[f32]:
x0,
diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs
index 9365fe10a2..d3d4381cc7 100644
--- a/crates/core_arch/src/x86/mod.rs
+++ b/crates/core_arch/src/x86/mod.rs
@@ -335,6 +335,41 @@ types! {
u16, u16, u16, u16, u16, u16, u16, u16,
u16, u16, u16, u16, u16, u16, u16, u16
);
+
+ /// 128-bit wide set of 8 `f16` types, x86-specific
+ ///
+ /// This type is the same as the `__m128h` type defined by Intel,
+ /// representing a 128-bit SIMD register which internally is consisted of
+ /// 8 packed `f16` instances. its purpose is for f16 related intrinsic
+ /// implementations.
+ #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+ pub struct __m128h(f16, f16, f16, f16, f16, f16, f16, f16);
+
+ /// 256-bit wide set of 16 `f16` types, x86-specific
+ ///
+ /// This type is the same as the `__m256h` type defined by Intel,
+ /// representing a 256-bit SIMD register which internally is consisted of
+ /// 16 packed `f16` instances. its purpose is for f16 related intrinsic
+ /// implementations.
+ #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+ pub struct __m256h(
+ f16, f16, f16, f16, f16, f16, f16, f16,
+ f16, f16, f16, f16, f16, f16, f16, f16
+ );
+
+ /// 512-bit wide set of 32 `f16` types, x86-specific
+ ///
+ /// This type is the same as the `__m512h` type defined by Intel,
+ /// representing a 512-bit SIMD register which internally is consisted of
+ /// 32 packed `f16` instances. its purpose is for f16 related intrinsic
+ /// implementations.
+ #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+ pub struct __m512h(
+ f16, f16, f16, f16, f16, f16, f16, f16,
+ f16, f16, f16, f16, f16, f16, f16, f16,
+ f16, f16, f16, f16, f16, f16, f16, f16,
+ f16, f16, f16, f16, f16, f16, f16, f16
+ );
}
/// The BFloat16 type used in AVX-512 intrinsics.
@@ -761,6 +796,50 @@ impl m512bhExt for __m512bh {
}
}
+#[allow(non_camel_case_types)]
+pub(crate) trait m128hExt: Sized {
+ fn as_m128h(self) -> __m128h;
+
+ #[inline]
+ fn as_f16x8(self) -> crate::core_arch::simd::f16x8 {
+ unsafe { transmute(self.as_m128h()) }
+ }
+}
+
+impl m128hExt for __m128h {
+ #[inline]
+ fn as_m128h(self) -> Self {
+ self
+ }
+}
+
+#[allow(non_camel_case_types)]
+pub(crate) trait m256hExt: Sized {
+ fn as_m256h(self) -> __m256h;
+
+ #[inline]
+ fn as_f16x16(self) -> crate::core_arch::simd::f16x16 {
+ unsafe { transmute(self.as_m256h()) }
+ }
+}
+
+impl m256hExt for __m256h {
+ #[inline]
+ fn as_m256h(self) -> Self {
+ self
+ }
+}
+
+#[allow(non_camel_case_types)]
+pub(crate) trait m512hExt: Sized {
+ fn as_m512h(self) -> __m512h;
+
+ #[inline]
+ fn as_f16x32(self) -> crate::core_arch::simd::f16x32 {
+ unsafe { transmute(self.as_m512h()) }
+ }
+}
+
mod eflags;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub use self::eflags::*;
diff --git a/crates/core_arch/src/x86/test.rs b/crates/core_arch/src/x86/test.rs
index 2c88650af3..ebb67356a4 100644
--- a/crates/core_arch/src/x86/test.rs
+++ b/crates/core_arch/src/x86/test.rs
@@ -36,6 +36,17 @@ pub unsafe fn get_m128(a: __m128, idx: usize) -> f32 {
transmute::<_, [f32; 4]>(a)[idx]
}
+#[track_caller]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn assert_eq_m128h(a: __m128h, b: __m128h) {
+ // FIXME: use `_mm_cmp_ph_mask::<_CMP_EQ_OQ>` when it's implemented
+ let r = _mm_cmpeq_epi16_mask(transmute(a), transmute(b));
+ if r != 0b1111_1111 {
+ panic!("{:?} != {:?}", a, b);
+ }
+}
+
// not actually an intrinsic but useful in various tests as we proted from
// `i64x2::new` which is backwards from `_mm_set_epi64x`
#[target_feature(enable = "sse2")]
@@ -77,6 +88,17 @@ pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 {
transmute::<_, [f32; 8]>(a)[idx]
}
+#[track_caller]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn assert_eq_m256h(a: __m256h, b: __m256h) {
+ // FIXME: use `_mm256_cmp_ph_mask::<_CMP_EQ_OQ>` when it's implemented
+ let r = _mm256_cmpeq_epi16_mask(transmute(a), transmute(b));
+ if r != 0b11111111_11111111 {
+ panic!("{:?} != {:?}", a, b);
+ }
+}
+
#[target_feature(enable = "avx512f")]
pub unsafe fn get_m512(a: __m512, idx: usize) -> f32 {
transmute::<_, [f32; 16]>(a)[idx]
@@ -139,3 +161,14 @@ pub unsafe fn assert_eq_m512d(a: __m512d, b: __m512d) {
panic!("{:?} != {:?}", a, b);
}
}
+
+#[track_caller]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn assert_eq_m512h(a: __m512h, b: __m512h) {
+ // FIXME: use `_mm512_cmp_ph_mask::<_CMP_EQ_OQ>` when it's implemented
+ let r = _mm512_cmpeq_epi16_mask(transmute(a), transmute(b));
+ if r != 0b11111111_11111111_11111111_11111111 {
+ panic!("{:?} != {:?}", a, b);
+ }
+}
diff --git a/crates/stdarch-verify/src/lib.rs b/crates/stdarch-verify/src/lib.rs
index 106aeabdb0..efb5d50e26 100644
--- a/crates/stdarch-verify/src/lib.rs
+++ b/crates/stdarch-verify/src/lib.rs
@@ -182,14 +182,17 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
"__m128" => quote! { &M128 },
"__m128bh" => quote! { &M128BH },
"__m128d" => quote! { &M128D },
+ "__m128h" => quote! { &M128H },
"__m128i" => quote! { &M128I },
"__m256" => quote! { &M256 },
"__m256bh" => quote! { &M256BH },
"__m256d" => quote! { &M256D },
+ "__m256h" => quote! { &M256H },
"__m256i" => quote! { &M256I },
"__m512" => quote! { &M512 },
"__m512bh" => quote! { &M512BH },
"__m512d" => quote! { &M512D },
+ "__m512h" => quote! { &M512H },
"__m512i" => quote! { &M512I },
"__mmask8" => quote! { &MMASK8 },
"__mmask16" => quote! { &MMASK16 },
@@ -201,6 +204,7 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
"_MM_PERM_ENUM" => quote! { &MM_PERM_ENUM },
"bool" => quote! { &BOOL },
"bf16" => quote! { &BF16 },
+ "f16" => quote! { &F16 },
"f32" => quote! { &F32 },
"f64" => quote! { &F64 },
"i16" => quote! { &I16 },
diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs
index d035b4edff..fadaa6a4b1 100644
--- a/crates/stdarch-verify/tests/x86-intel.rs
+++ b/crates/stdarch-verify/tests/x86-intel.rs
@@ -24,6 +24,7 @@ struct Function {
}
static BF16: Type = Type::BFloat16;
+static F16: Type = Type::PrimFloat(16);
static F32: Type = Type::PrimFloat(32);
static F64: Type = Type::PrimFloat(64);
static I8: Type = Type::PrimSigned(8);
@@ -41,14 +42,17 @@ static M128: Type = Type::M128;
static M128BH: Type = Type::M128BH;
static M128I: Type = Type::M128I;
static M128D: Type = Type::M128D;
+static M128H: Type = Type::M128H;
static M256: Type = Type::M256;
static M256BH: Type = Type::M256BH;
static M256I: Type = Type::M256I;
static M256D: Type = Type::M256D;
+static M256H: Type = Type::M256H;
static M512: Type = Type::M512;
static M512BH: Type = Type::M512BH;
static M512I: Type = Type::M512I;
static M512D: Type = Type::M512D;
+static M512H: Type = Type::M512H;
static MMASK8: Type = Type::MMASK8;
static MMASK16: Type = Type::MMASK16;
static MMASK32: Type = Type::MMASK32;
@@ -73,14 +77,17 @@ enum Type {
M128,
M128BH,
M128D,
+ M128H,
M128I,
M256,
M256BH,
M256D,
+ M256H,
M256I,
M512,
M512BH,
M512D,
+ M512H,
M512I,
MMASK8,
MMASK16,
@@ -221,13 +228,16 @@ fn verify_all_signatures() {
"_mm_undefined_ps",
"_mm_undefined_pd",
"_mm_undefined_si128",
+ "_mm_undefined_ph",
"_mm256_undefined_ps",
"_mm256_undefined_pd",
"_mm256_undefined_si256",
+ "_mm256_undefined_ph",
"_mm512_undefined_ps",
"_mm512_undefined_pd",
"_mm512_undefined_epi32",
"_mm512_undefined",
+ "_mm512_undefined_ph",
// Has doc-tests instead
"_mm256_shuffle_epi32",
"_mm256_unpackhi_epi8",
@@ -483,6 +493,9 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
// The XML file names BF16 as "avx512_bf16", while Rust calls
// it "avx512bf16".
"avx512_bf16" => String::from("avx512bf16"),
+ // The XML file names FP16 as "avx512_fp16", while Rust calls
+ // it "avx512fp16".
+ "avx512_fp16" => String::from("avx512fp16"),
// The XML file names AVX-VNNI as "avx_vnni", while Rust calls
// it "avxvnni"
"avx_vnni" => String::from("avxvnni"),
@@ -709,6 +722,7 @@ fn equate(
}
}
match (t, &intel[..]) {
+ (&Type::PrimFloat(16), "_Float16") => {}
(&Type::PrimFloat(32), "float") => {}
(&Type::PrimFloat(64), "double") => {}
(&Type::PrimSigned(8), "__int8" | "char") => {}
@@ -728,14 +742,17 @@ fn equate(
(&Type::M128BH, "__m128bh") => {}
(&Type::M128I, "__m128i") => {}
(&Type::M128D, "__m128d") => {}
+ (&Type::M128H, "__m128h") => {}
(&Type::M256, "__m256") => {}
(&Type::M256BH, "__m256bh") => {}
(&Type::M256I, "__m256i") => {}
(&Type::M256D, "__m256d") => {}
+ (&Type::M256H, "__m256h") => {}
(&Type::M512, "__m512") => {}
(&Type::M512BH, "__m512bh") => {}
(&Type::M512I, "__m512i") => {}
(&Type::M512D, "__m512d") => {}
+ (&Type::M512H, "__m512h") => {}
(&Type::MMASK64, "__mmask64") => {}
(&Type::MMASK32, "__mmask32") => {}
(&Type::MMASK16, "__mmask16") => {}
@@ -771,6 +788,7 @@ fn equate(
(&Type::MutPtr(&Type::M512D), "__m512d*") => {}
(&Type::ConstPtr(_), "void const*") => {}
+ (&Type::ConstPtr(&Type::PrimFloat(16)), "_Float16 const*") => {}
(&Type::ConstPtr(&Type::PrimFloat(32)), "float const*") => {}
(&Type::ConstPtr(&Type::PrimFloat(64)), "double const*") => {}
(&Type::ConstPtr(&Type::PrimSigned(8)), "char const*") => {}
@@ -785,6 +803,7 @@ fn equate(
(&Type::ConstPtr(&Type::M128BH), "__m128bh const*") => {}
(&Type::ConstPtr(&Type::M128I), "__m128i const*") => {}
(&Type::ConstPtr(&Type::M128D), "__m128d const*") => {}
+ (&Type::ConstPtr(&Type::M128H), "__m128h const*") => {}
(&Type::ConstPtr(&Type::M256), "__m256 const*") => {}
(&Type::ConstPtr(&Type::M256BH), "__m256bh const*") => {}
(&Type::ConstPtr(&Type::M256I), "__m256i const*") => {}
From 1b093be687c125850a250ecc3f680843ff908be5 Mon Sep 17 00:00:00 2001
From: sayantn
Date: Sun, 7 Jul 2024 16:03:17 +0530
Subject: [PATCH 02/11] AVX512FP16 Part 1
Add-Sub-Mul-Div, Load-Store-Move, `comi`, `set`
---
crates/core_arch/missing-x86.md | 149 -
crates/core_arch/src/x86/avx512fp16.rs | 4004 ++++++++++++++++++++++++
crates/core_arch/src/x86/mod.rs | 4 +
3 files changed, 4008 insertions(+), 149 deletions(-)
create mode 100644 crates/core_arch/src/x86/avx512fp16.rs
diff --git a/crates/core_arch/missing-x86.md b/crates/core_arch/missing-x86.md
index 0916befe04..7bc2456ddd 100644
--- a/crates/core_arch/missing-x86.md
+++ b/crates/core_arch/missing-x86.md
@@ -53,33 +53,9 @@
["AVX512_FP16"]
- * [ ] [`_mm256_castpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_ph)
- * [ ] [`_mm256_castph128_ph256`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph128_ph256)
- * [ ] [`_mm256_castph256_ph128`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph256_ph128)
- * [ ] [`_mm256_castph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_pd)
- * [ ] [`_mm256_castph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_ps)
- * [ ] [`_mm256_castph_si256`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_si256)
- * [ ] [`_mm256_castps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_ph)
- * [ ] [`_mm256_castsi256_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_ph)
* [ ] [`_mm256_cvtsh_h`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsh_h)
* [ ] [`_mm256_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_pch)
- * [ ] [`_mm256_set1_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_ph)
- * [ ] [`_mm256_set_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_ph)
- * [ ] [`_mm256_setr_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_ph)
- * [ ] [`_mm256_zextph128_ph256`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextph128_ph256)
* [ ] [`_mm512_abs_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ph)
- * [ ] [`_mm512_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_ph)
- * [ ] [`_mm512_add_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_ph)
- * [ ] [`_mm512_castpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd_ph)
- * [ ] [`_mm512_castph128_ph512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph128_ph512)
- * [ ] [`_mm512_castph256_ph512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph256_ph512)
- * [ ] [`_mm512_castph512_ph128`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph128)
- * [ ] [`_mm512_castph512_ph256`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph256)
- * [ ] [`_mm512_castph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_pd)
- * [ ] [`_mm512_castph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_ps)
- * [ ] [`_mm512_castph_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_si512)
- * [ ] [`_mm512_castps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps_ph)
- * [ ] [`_mm512_castsi512_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_ph)
* [ ] [`_mm512_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_ph_mask)
* [ ] [`_mm512_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask)
* [ ] [`_mm512_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_pch)
@@ -130,8 +106,6 @@
* [ ] [`_mm512_cvtx_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundps_ph)
* [ ] [`_mm512_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps)
* [ ] [`_mm512_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph)
- * [ ] [`_mm512_div_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_ph)
- * [ ] [`_mm512_div_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_round_ph)
* [ ] [`_mm512_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_pch)
* [ ] [`_mm512_fcmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_round_pch)
* [ ] [`_mm512_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_pch)
@@ -157,8 +131,6 @@
* [ ] [`_mm512_getexp_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ph)
* [ ] [`_mm512_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ph)
* [ ] [`_mm512_getmant_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ph)
- * [ ] [`_mm512_load_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_ph)
- * [ ] [`_mm512_loadu_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ph)
* [ ] [`_mm512_mask3_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_pch)
* [ ] [`_mm512_mask3_fcmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_round_pch)
* [ ] [`_mm512_mask3_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_pch)
@@ -175,8 +147,6 @@
* [ ] [`_mm512_mask3_fnmadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_round_ph)
* [ ] [`_mm512_mask3_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_ph)
* [ ] [`_mm512_mask3_fnmsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_ph)
- * [ ] [`_mm512_mask_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_ph)
- * [ ] [`_mm512_mask_add_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_ph)
* [ ] [`_mm512_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph)
* [ ] [`_mm512_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask)
* [ ] [`_mm512_mask_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask)
@@ -227,8 +197,6 @@
* [ ] [`_mm512_mask_cvtx_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundps_ph)
* [ ] [`_mm512_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps)
* [ ] [`_mm512_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph)
- * [ ] [`_mm512_mask_div_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_ph)
- * [ ] [`_mm512_mask_div_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_round_ph)
* [ ] [`_mm512_mask_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_pch)
* [ ] [`_mm512_mask_fcmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_round_pch)
* [ ] [`_mm512_mask_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_pch)
@@ -259,9 +227,7 @@
* [ ] [`_mm512_mask_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_ph)
* [ ] [`_mm512_mask_min_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_round_ph)
* [ ] [`_mm512_mask_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pch)
- * [ ] [`_mm512_mask_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_ph)
* [ ] [`_mm512_mask_mul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_pch)
- * [ ] [`_mm512_mask_mul_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_ph)
* [ ] [`_mm512_mask_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp_ph)
* [ ] [`_mm512_mask_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph)
* [ ] [`_mm512_mask_reduce_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph)
@@ -272,10 +238,6 @@
* [ ] [`_mm512_mask_scalef_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph)
* [ ] [`_mm512_mask_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_ph)
* [ ] [`_mm512_mask_sqrt_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_ph)
- * [ ] [`_mm512_mask_sub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_ph)
- * [ ] [`_mm512_mask_sub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_round_ph)
- * [ ] [`_mm512_maskz_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_ph)
- * [ ] [`_mm512_maskz_add_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_ph)
* [ ] [`_mm512_maskz_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_pch)
* [ ] [`_mm512_maskz_cmul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_round_pch)
* [ ] [`_mm512_maskz_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conj_pch)
@@ -323,8 +285,6 @@
* [ ] [`_mm512_maskz_cvtx_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph)
* [ ] [`_mm512_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps)
* [ ] [`_mm512_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph)
- * [ ] [`_mm512_maskz_div_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_ph)
- * [ ] [`_mm512_maskz_div_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_round_ph)
* [ ] [`_mm512_maskz_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_pch)
* [ ] [`_mm512_maskz_fcmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_round_pch)
* [ ] [`_mm512_maskz_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_pch)
@@ -354,9 +314,7 @@
* [ ] [`_mm512_maskz_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_ph)
* [ ] [`_mm512_maskz_min_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_round_ph)
* [ ] [`_mm512_maskz_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pch)
- * [ ] [`_mm512_maskz_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_ph)
* [ ] [`_mm512_maskz_mul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_pch)
- * [ ] [`_mm512_maskz_mul_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_ph)
* [ ] [`_mm512_maskz_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rcp_ph)
* [ ] [`_mm512_maskz_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph)
* [ ] [`_mm512_maskz_reduce_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph)
@@ -367,16 +325,12 @@
* [ ] [`_mm512_maskz_scalef_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ph)
* [ ] [`_mm512_maskz_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_ph)
* [ ] [`_mm512_maskz_sqrt_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_round_ph)
- * [ ] [`_mm512_maskz_sub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_ph)
- * [ ] [`_mm512_maskz_sub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_round_ph)
* [ ] [`_mm512_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_ph)
* [ ] [`_mm512_max_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_round_ph)
* [ ] [`_mm512_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_ph)
* [ ] [`_mm512_min_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_round_ph)
* [ ] [`_mm512_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_pch)
- * [ ] [`_mm512_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_ph)
* [ ] [`_mm512_mul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_pch)
- * [ ] [`_mm512_mul_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_ph)
* [ ] [`_mm512_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph)
* [ ] [`_mm512_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph)
* [ ] [`_mm512_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp_ph)
@@ -392,39 +346,12 @@
* [ ] [`_mm512_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ph)
* [ ] [`_mm512_scalef_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ph)
* [ ] [`_mm512_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_pch)
- * [ ] [`_mm512_set1_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_ph)
- * [ ] [`_mm512_set_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_ph)
- * [ ] [`_mm512_setr_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_ph)
- * [ ] [`_mm512_setzero_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_ph)
* [ ] [`_mm512_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_ph)
* [ ] [`_mm512_sqrt_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_round_ph)
- * [ ] [`_mm512_store_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_ph)
- * [ ] [`_mm512_storeu_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ph)
- * [ ] [`_mm512_sub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_ph)
- * [ ] [`_mm512_sub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_round_ph)
- * [ ] [`_mm512_undefined_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ph)
- * [ ] [`_mm512_zextph128_ph512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph128_ph512)
- * [ ] [`_mm512_zextph256_ph512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph256_ph512)
- * [ ] [`_mm_add_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sh)
- * [ ] [`_mm_add_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sh)
- * [ ] [`_mm_castpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ph)
- * [ ] [`_mm_castph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_pd)
- * [ ] [`_mm_castph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_ps)
- * [ ] [`_mm_castph_si128`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_si128)
- * [ ] [`_mm_castps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_ph)
- * [ ] [`_mm_castsi128_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ph)
* [ ] [`_mm_cmp_round_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sh_mask)
* [ ] [`_mm_cmp_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sh_mask)
* [ ] [`_mm_cmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_round_sch)
* [ ] [`_mm_cmul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_sch)
- * [ ] [`_mm_comi_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sh)
- * [ ] [`_mm_comi_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_sh)
- * [ ] [`_mm_comieq_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sh)
- * [ ] [`_mm_comige_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sh)
- * [ ] [`_mm_comigt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sh)
- * [ ] [`_mm_comile_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sh)
- * [ ] [`_mm_comilt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sh)
- * [ ] [`_mm_comineq_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sh)
* [ ] [`_mm_cvt_roundi32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh)
* [ ] [`_mm_cvt_roundi64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sh)
* [ ] [`_mm_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_sh)
@@ -460,8 +387,6 @@
* [ ] [`_mm_cvttsh_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u64)
* [ ] [`_mm_cvtu32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh)
* [ ] [`_mm_cvtu64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sh)
- * [ ] [`_mm_div_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_round_sh)
- * [ ] [`_mm_div_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sh)
* [ ] [`_mm_fcmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_round_sch)
* [ ] [`_mm_fcmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_sch)
* [ ] [`_mm_fcmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_round_sch)
@@ -483,7 +408,6 @@
* [ ] [`_mm_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sh)
* [ ] [`_mm_getmant_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sh)
* [ ] [`_mm_getmant_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sh)
- * [ ] [`_mm_load_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sh)
* [ ] [`_mm_mask3_fcmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_round_sch)
* [ ] [`_mm_mask3_fcmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_sch)
* [ ] [`_mm_mask3_fmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sch)
@@ -496,8 +420,6 @@
* [ ] [`_mm_mask3_fnmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_sh)
* [ ] [`_mm_mask3_fnmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_round_sh)
* [ ] [`_mm_mask3_fnmsub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_sh)
- * [ ] [`_mm_mask_add_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_round_sh)
- * [ ] [`_mm_mask_add_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_sh)
* [ ] [`_mm_mask_cmp_round_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sh_mask)
* [ ] [`_mm_mask_cmp_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sh_mask)
* [ ] [`_mm_mask_cmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_round_sch)
@@ -510,8 +432,6 @@
* [ ] [`_mm_mask_cvtsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd)
* [ ] [`_mm_mask_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss)
* [ ] [`_mm_mask_cvtss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh)
- * [ ] [`_mm_mask_div_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_round_sh)
- * [ ] [`_mm_mask_div_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_sh)
* [ ] [`_mm_mask_fcmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_round_sch)
* [ ] [`_mm_mask_fcmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_sch)
* [ ] [`_mm_mask_fcmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_round_sch)
@@ -533,12 +453,8 @@
* [ ] [`_mm_mask_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh)
* [ ] [`_mm_mask_getmant_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh)
* [ ] [`_mm_mask_getmant_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh)
- * [ ] [`_mm_mask_load_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sh)
- * [ ] [`_mm_mask_move_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_move_sh)
* [ ] [`_mm_mask_mul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sch)
- * [ ] [`_mm_mask_mul_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sh)
* [ ] [`_mm_mask_mul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sch)
- * [ ] [`_mm_mask_mul_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sh)
* [ ] [`_mm_mask_rcp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_sh)
* [ ] [`_mm_mask_reduce_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh)
* [ ] [`_mm_mask_reduce_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh)
@@ -549,11 +465,6 @@
* [ ] [`_mm_mask_scalef_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh)
* [ ] [`_mm_mask_sqrt_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_round_sh)
* [ ] [`_mm_mask_sqrt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_sh)
- * [ ] [`_mm_mask_store_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sh)
- * [ ] [`_mm_mask_sub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_round_sh)
- * [ ] [`_mm_mask_sub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_sh)
- * [ ] [`_mm_maskz_add_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sh)
- * [ ] [`_mm_maskz_add_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_sh)
* [ ] [`_mm_maskz_cmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_round_sch)
* [ ] [`_mm_maskz_cmul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_sch)
* [ ] [`_mm_maskz_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh)
@@ -564,8 +475,6 @@
* [ ] [`_mm_maskz_cvtsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd)
* [ ] [`_mm_maskz_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss)
* [ ] [`_mm_maskz_cvtss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh)
- * [ ] [`_mm_maskz_div_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_round_sh)
- * [ ] [`_mm_maskz_div_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_sh)
* [ ] [`_mm_maskz_fcmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_round_sch)
* [ ] [`_mm_maskz_fcmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_sch)
* [ ] [`_mm_maskz_fcmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_round_sch)
@@ -586,12 +495,8 @@
* [ ] [`_mm_maskz_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh)
* [ ] [`_mm_maskz_getmant_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh)
* [ ] [`_mm_maskz_getmant_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh)
- * [ ] [`_mm_maskz_load_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sh)
- * [ ] [`_mm_maskz_move_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_move_sh)
* [ ] [`_mm_maskz_mul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sch)
- * [ ] [`_mm_maskz_mul_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sh)
* [ ] [`_mm_maskz_mul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sch)
- * [ ] [`_mm_maskz_mul_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sh)
* [ ] [`_mm_maskz_rcp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_sh)
* [ ] [`_mm_maskz_reduce_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh)
* [ ] [`_mm_maskz_reduce_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh)
@@ -602,13 +507,8 @@
* [ ] [`_mm_maskz_scalef_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh)
* [ ] [`_mm_maskz_sqrt_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_round_sh)
* [ ] [`_mm_maskz_sqrt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_sh)
- * [ ] [`_mm_maskz_sub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_round_sh)
- * [ ] [`_mm_maskz_sub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_sh)
- * [ ] [`_mm_move_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sh)
* [ ] [`_mm_mul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sch)
- * [ ] [`_mm_mul_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sh)
* [ ] [`_mm_mul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sch)
- * [ ] [`_mm_mul_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sh)
* [ ] [`_mm_rcp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_sh)
* [ ] [`_mm_reduce_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh)
* [ ] [`_mm_reduce_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh)
@@ -618,28 +518,14 @@
* [ ] [`_mm_scalef_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sh)
* [ ] [`_mm_scalef_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sh)
* [ ] [`_mm_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pch)
- * [ ] [`_mm_set1_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_ph)
- * [ ] [`_mm_set_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ph)
- * [ ] [`_mm_set_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sh)
- * [ ] [`_mm_setr_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_ph)
* [ ] [`_mm_sqrt_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_round_sh)
* [ ] [`_mm_sqrt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sh)
- * [ ] [`_mm_store_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sh)
- * [ ] [`_mm_sub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_round_sh)
- * [ ] [`_mm_sub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sh)
- * [ ] [`_mm_ucomieq_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sh)
- * [ ] [`_mm_ucomige_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sh)
- * [ ] [`_mm_ucomigt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sh)
- * [ ] [`_mm_ucomile_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sh)
- * [ ] [`_mm_ucomilt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sh)
- * [ ] [`_mm_ucomineq_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sh)
["AVX512_FP16", "AVX512VL"]
* [ ] [`_mm256_abs_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_ph)
- * [ ] [`_mm256_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_ph)
* [ ] [`_mm256_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ph_mask)
* [ ] [`_mm256_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmul_pch)
* [ ] [`_mm256_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conj_pch)
@@ -665,7 +551,6 @@
* [ ] [`_mm256_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu64)
* [ ] [`_mm256_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps)
* [ ] [`_mm256_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph)
- * [ ] [`_mm256_div_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_ph)
* [ ] [`_mm256_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmadd_pch)
* [ ] [`_mm256_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmul_pch)
* [ ] [`_mm256_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pch)
@@ -679,8 +564,6 @@
* [ ] [`_mm256_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask)
* [ ] [`_mm256_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getexp_ph)
* [ ] [`_mm256_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getmant_ph)
- * [ ] [`_mm256_load_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_ph)
- * [ ] [`_mm256_loadu_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_ph)
* [ ] [`_mm256_mask3_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fcmadd_pch)
* [ ] [`_mm256_mask3_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_pch)
* [ ] [`_mm256_mask3_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_ph)
@@ -689,7 +572,6 @@
* [ ] [`_mm256_mask3_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsubadd_ph)
* [ ] [`_mm256_mask3_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmadd_ph)
* [ ] [`_mm256_mask3_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmsub_ph)
- * [ ] [`_mm256_mask_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_ph)
* [ ] [`_mm256_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph)
* [ ] [`_mm256_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask)
* [ ] [`_mm256_mask_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmul_pch)
@@ -716,7 +598,6 @@
* [ ] [`_mm256_mask_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu64)
* [ ] [`_mm256_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps)
* [ ] [`_mm256_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph)
- * [ ] [`_mm256_mask_div_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_div_ph)
* [ ] [`_mm256_mask_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmadd_pch)
* [ ] [`_mm256_mask_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmul_pch)
* [ ] [`_mm256_mask_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_pch)
@@ -733,15 +614,12 @@
* [ ] [`_mm256_mask_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_ph)
* [ ] [`_mm256_mask_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_ph)
* [ ] [`_mm256_mask_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_pch)
- * [ ] [`_mm256_mask_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_ph)
* [ ] [`_mm256_mask_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rcp_ph)
* [ ] [`_mm256_mask_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph)
* [ ] [`_mm256_mask_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph)
* [ ] [`_mm256_mask_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rsqrt_ph)
* [ ] [`_mm256_mask_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph)
* [ ] [`_mm256_mask_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sqrt_ph)
- * [ ] [`_mm256_mask_sub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_ph)
- * [ ] [`_mm256_maskz_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_ph)
* [ ] [`_mm256_maskz_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cmul_pch)
* [ ] [`_mm256_maskz_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conj_pch)
* [ ] [`_mm256_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph)
@@ -766,7 +644,6 @@
* [ ] [`_mm256_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64)
* [ ] [`_mm256_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps)
* [ ] [`_mm256_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph)
- * [ ] [`_mm256_maskz_div_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_div_ph)
* [ ] [`_mm256_maskz_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmadd_pch)
* [ ] [`_mm256_maskz_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmul_pch)
* [ ] [`_mm256_maskz_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_pch)
@@ -782,18 +659,15 @@
* [ ] [`_mm256_maskz_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_ph)
* [ ] [`_mm256_maskz_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_ph)
* [ ] [`_mm256_maskz_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_pch)
- * [ ] [`_mm256_maskz_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_ph)
* [ ] [`_mm256_maskz_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rcp_ph)
* [ ] [`_mm256_maskz_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph)
* [ ] [`_mm256_maskz_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph)
* [ ] [`_mm256_maskz_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rsqrt_ph)
* [ ] [`_mm256_maskz_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_scalef_ph)
* [ ] [`_mm256_maskz_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sqrt_ph)
- * [ ] [`_mm256_maskz_sub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_ph)
* [ ] [`_mm256_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ph)
* [ ] [`_mm256_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ph)
* [ ] [`_mm256_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pch)
- * [ ] [`_mm256_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_ph)
* [ ] [`_mm256_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph)
* [ ] [`_mm256_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph)
* [ ] [`_mm256_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ph)
@@ -805,14 +679,8 @@
* [ ] [`_mm256_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ph)
* [ ] [`_mm256_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ph)
* [ ] [`_mm256_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph)
- * [ ] [`_mm256_setzero_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ph)
* [ ] [`_mm256_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ph)
- * [ ] [`_mm256_store_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_ph)
- * [ ] [`_mm256_storeu_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_ph)
- * [ ] [`_mm256_sub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_ph)
- * [ ] [`_mm256_undefined_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ph)
* [ ] [`_mm_abs_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_ph)
- * [ ] [`_mm_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ph)
* [ ] [`_mm_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask)
* [ ] [`_mm_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_pch)
* [ ] [`_mm_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conj_pch)
@@ -838,7 +706,6 @@
* [ ] [`_mm_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu64)
* [ ] [`_mm_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps)
* [ ] [`_mm_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph)
- * [ ] [`_mm_div_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ph)
* [ ] [`_mm_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_pch)
* [ ] [`_mm_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_pch)
* [ ] [`_mm_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pch)
@@ -852,8 +719,6 @@
* [ ] [`_mm_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask)
* [ ] [`_mm_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ph)
* [ ] [`_mm_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ph)
- * [ ] [`_mm_load_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ph)
- * [ ] [`_mm_loadu_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_ph)
* [ ] [`_mm_mask3_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_pch)
* [ ] [`_mm_mask3_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_pch)
* [ ] [`_mm_mask3_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_ph)
@@ -862,7 +727,6 @@
* [ ] [`_mm_mask3_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsubadd_ph)
* [ ] [`_mm_mask3_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_ph)
* [ ] [`_mm_mask3_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_ph)
- * [ ] [`_mm_mask_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_ph)
* [ ] [`_mm_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph)
* [ ] [`_mm_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask)
* [ ] [`_mm_mask_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_pch)
@@ -889,7 +753,6 @@
* [ ] [`_mm_mask_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu64)
* [ ] [`_mm_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps)
* [ ] [`_mm_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph)
- * [ ] [`_mm_mask_div_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_ph)
* [ ] [`_mm_mask_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_pch)
* [ ] [`_mm_mask_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_pch)
* [ ] [`_mm_mask_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_pch)
@@ -910,15 +773,12 @@
* [ ] [`_mm_mask_min_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_round_sh)
* [ ] [`_mm_mask_min_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_sh)
* [ ] [`_mm_mask_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_pch)
- * [ ] [`_mm_mask_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_ph)
* [ ] [`_mm_mask_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_ph)
* [ ] [`_mm_mask_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph)
* [ ] [`_mm_mask_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph)
* [ ] [`_mm_mask_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_ph)
* [ ] [`_mm_mask_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph)
* [ ] [`_mm_mask_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_ph)
- * [ ] [`_mm_mask_sub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_ph)
- * [ ] [`_mm_maskz_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_ph)
* [ ] [`_mm_maskz_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_pch)
* [ ] [`_mm_maskz_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conj_pch)
* [ ] [`_mm_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph)
@@ -943,7 +803,6 @@
* [ ] [`_mm_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64)
* [ ] [`_mm_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps)
* [ ] [`_mm_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph)
- * [ ] [`_mm_maskz_div_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_ph)
* [ ] [`_mm_maskz_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_pch)
* [ ] [`_mm_maskz_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_pch)
* [ ] [`_mm_maskz_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_pch)
@@ -963,14 +822,12 @@
* [ ] [`_mm_maskz_min_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_round_sh)
* [ ] [`_mm_maskz_min_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_sh)
* [ ] [`_mm_maskz_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_pch)
- * [ ] [`_mm_maskz_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_ph)
* [ ] [`_mm_maskz_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_ph)
* [ ] [`_mm_maskz_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph)
* [ ] [`_mm_maskz_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph)
* [ ] [`_mm_maskz_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_ph)
* [ ] [`_mm_maskz_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ph)
* [ ] [`_mm_maskz_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_ph)
- * [ ] [`_mm_maskz_sub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_ph)
* [ ] [`_mm_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ph)
* [ ] [`_mm_max_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_round_sh)
* [ ] [`_mm_max_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sh)
@@ -978,7 +835,6 @@
* [ ] [`_mm_min_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_round_sh)
* [ ] [`_mm_min_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sh)
* [ ] [`_mm_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pch)
- * [ ] [`_mm_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ph)
* [ ] [`_mm_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph)
* [ ] [`_mm_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph)
* [ ] [`_mm_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ph)
@@ -990,12 +846,7 @@
* [ ] [`_mm_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ph)
* [ ] [`_mm_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ph)
* [ ] [`_mm_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ph)
- * [ ] [`_mm_setzero_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_ph)
* [ ] [`_mm_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ph)
- * [ ] [`_mm_store_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ph)
- * [ ] [`_mm_storeu_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_ph)
- * [ ] [`_mm_sub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ph)
- * [ ] [`_mm_undefined_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ph)
diff --git a/crates/core_arch/src/x86/avx512fp16.rs b/crates/core_arch/src/x86/avx512fp16.rs
new file mode 100644
index 0000000000..c6eeff1904
--- /dev/null
+++ b/crates/core_arch/src/x86/avx512fp16.rs
@@ -0,0 +1,4004 @@
+use crate::arch::asm;
+use crate::core_arch::{simd::*, x86::*};
+use crate::intrinsics::simd::*;
+use crate::ptr;
+
+/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_set_ph(
+ e7: f16,
+ e6: f16,
+ e5: f16,
+ e4: f16,
+ e3: f16,
+ e2: f16,
+ e1: f16,
+ e0: f16,
+) -> __m128h {
+ __m128h(e0, e1, e2, e3, e4, e5, e6, e7)
+}
+
+/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_set_ph(
+ e15: f16,
+ e14: f16,
+ e13: f16,
+ e12: f16,
+ e11: f16,
+ e10: f16,
+ e9: f16,
+ e8: f16,
+ e7: f16,
+ e6: f16,
+ e5: f16,
+ e4: f16,
+ e3: f16,
+ e2: f16,
+ e1: f16,
+ e0: f16,
+) -> __m256h {
+ __m256h(
+ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
+ )
+}
+
+/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_set_ph(
+ e31: f16,
+ e30: f16,
+ e29: f16,
+ e28: f16,
+ e27: f16,
+ e26: f16,
+ e25: f16,
+ e24: f16,
+ e23: f16,
+ e22: f16,
+ e21: f16,
+ e20: f16,
+ e19: f16,
+ e18: f16,
+ e17: f16,
+ e16: f16,
+ e15: f16,
+ e14: f16,
+ e13: f16,
+ e12: f16,
+ e11: f16,
+ e10: f16,
+ e9: f16,
+ e8: f16,
+ e7: f16,
+ e6: f16,
+ e5: f16,
+ e4: f16,
+ e3: f16,
+ e2: f16,
+ e1: f16,
+ e0: f16,
+) -> __m512h {
+ __m512h(
+ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
+ e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
+ )
+}
+
+/// Copy half-precision (16-bit) floating-point elements from a to the lower element of dst and zero
+/// the upper 7 elements.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_set_sh(a: f16) -> __m128h {
+ __m128h(a, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
+}
+
+/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_set1_ph(a: f16) -> __m128h {
+ transmute(f16x8::splat(a))
+}
+
+/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_set1_ph(a: f16) -> __m256h {
+ transmute(f16x16::splat(a))
+}
+
+/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_set1_ph(a: f16) -> __m512h {
+ transmute(f16x32::splat(a))
+}
+
+/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_setr_ph(
+ e0: f16,
+ e1: f16,
+ e2: f16,
+ e3: f16,
+ e4: f16,
+ e5: f16,
+ e6: f16,
+ e7: f16,
+) -> __m128h {
+ __m128h(e0, e1, e2, e3, e4, e5, e6, e7)
+}
+
+/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_setr_ph(
+ e0: f16,
+ e1: f16,
+ e2: f16,
+ e3: f16,
+ e4: f16,
+ e5: f16,
+ e6: f16,
+ e7: f16,
+ e8: f16,
+ e9: f16,
+ e10: f16,
+ e11: f16,
+ e12: f16,
+ e13: f16,
+ e14: f16,
+ e15: f16,
+) -> __m256h {
+ __m256h(
+ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
+ )
+}
+
+/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_setr_ph(
+ e0: f16,
+ e1: f16,
+ e2: f16,
+ e3: f16,
+ e4: f16,
+ e5: f16,
+ e6: f16,
+ e7: f16,
+ e8: f16,
+ e9: f16,
+ e10: f16,
+ e11: f16,
+ e12: f16,
+ e13: f16,
+ e14: f16,
+ e15: f16,
+ e16: f16,
+ e17: f16,
+ e18: f16,
+ e19: f16,
+ e20: f16,
+ e21: f16,
+ e22: f16,
+ e23: f16,
+ e24: f16,
+ e25: f16,
+ e26: f16,
+ e27: f16,
+ e28: f16,
+ e29: f16,
+ e30: f16,
+ e31: f16,
+) -> __m512h {
+ __m512h(
+ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
+ e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
+ )
+}
+
+/// Return vector of type __m128h with all elements set to zero.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_setzero_ph() -> __m128h {
+ transmute(f16x8::splat(0.0))
+}
+
+/// Return vector of type __m256h with all elements set to zero.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_setzero_ph() -> __m256h {
+ transmute(f16x16::splat(0.0))
+}
+
+/// Return vector of type __m512h with all elements set to zero.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_setzero_ph() -> __m512h {
+ transmute(f16x32::splat(0.0))
+}
+
+/// Return vector of type `__m128h` with undefined elements. In practice, this returns the all-zero
+/// vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_undefined_ph() -> __m128h {
+ transmute(f16x8::splat(0.0))
+}
+
+/// Return vector of type `__m256h` with undefined elements. In practice, this returns the all-zero
+/// vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_undefined_ph() -> __m256h {
+ transmute(f16x16::splat(0.0))
+}
+
+/// Return vector of type `__m512h` with undefined elements. In practice, this returns the all-zero
+/// vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_undefined_ph() -> __m512h {
+ transmute(f16x32::splat(0.0))
+}
+
+/// Cast vector of type `__m128d` to type `__m128h`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_castpd_ph(a: __m128d) -> __m128h {
+ transmute(a)
+}
+
+/// Cast vector of type `__m256d` to type `__m256h`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_castpd_ph(a: __m256d) -> __m256h {
+ transmute(a)
+}
+
+/// Cast vector of type `__m512d` to type `__m512h`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_castpd_ph(a: __m512d) -> __m512h {
+ transmute(a)
+}
+
+/// Cast vector of type `__m128h` to type `__m128d`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_pd)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_castph_pd(a: __m128h) -> __m128d {
+ transmute(a)
+}
+
+/// Cast vector of type `__m256h` to type `__m256d`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_pd)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_castph_pd(a: __m256h) -> __m256d {
+ transmute(a)
+}
+
+/// Cast vector of type `__m512h` to type `__m512d`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_pd)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_castph_pd(a: __m512h) -> __m512d {
+ transmute(a)
+}
+
+/// Cast vector of type `__m128` to type `__m128h`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_castps_ph(a: __m128) -> __m128h {
+ transmute(a)
+}
+
+/// Cast vector of type `__m256` to type `__m256h`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_castps_ph(a: __m256) -> __m256h {
+ transmute(a)
+}
+
+/// Cast vector of type `__m512` to type `__m512h`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_castps_ph(a: __m512) -> __m512h {
+ transmute(a)
+}
+
+/// Cast vector of type `__m128h` to type `__m128`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_ps)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_castph_ps(a: __m128h) -> __m128 {
+ transmute(a)
+}
+
+/// Cast vector of type `__m256h` to type `__m256`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_ps)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_castph_ps(a: __m256h) -> __m256 {
+ transmute(a)
+}
+
+/// Cast vector of type `__m512h` to type `__m512`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_ps)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_castph_ps(a: __m512h) -> __m512 {
+ transmute(a)
+}
+
+/// Cast vector of type `__m128i` to type `__m128h`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_castsi128_ph(a: __m128i) -> __m128h {
+ transmute(a)
+}
+
+/// Cast vector of type `__m256i` to type `__m256h`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_castsi256_ph(a: __m256i) -> __m256h {
+ transmute(a)
+}
+
+/// Cast vector of type `__m512i` to type `__m512h`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_castsi512_ph(a: __m512i) -> __m512h {
+ transmute(a)
+}
+
+/// Cast vector of type `__m128h` to type `__m128i`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_si128)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_castph_si128(a: __m128h) -> __m128i {
+ transmute(a)
+}
+
+/// Cast vector of type `__m256h` to type `__m256i`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_si256)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_castph_si256(a: __m256h) -> __m256i {
+ transmute(a)
+}
+
+/// Cast vector of type `__m512h` to type `__m512i`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_si512)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_castph_si512(a: __m512h) -> __m512i {
+ transmute(a)
+}
+
+/// Cast vector of type `__m256h` to type `__m128h`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph256_ph128)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_castph256_ph128(a: __m256h) -> __m128h {
+ simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Cast vector of type `__m512h` to type `__m128h`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph128)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_castph512_ph128(a: __m512h) -> __m128h {
+ simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Cast vector of type `__m512h` to type `__m256h`. This intrinsic is only used for compilation and
+/// does not generate any instructions, thus it has zero latency.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph256)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_castph512_ph256(a: __m512h) -> __m256h {
+ simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Cast vector of type `__m128h` to type `__m256h`. The upper 8 elements of the result are undefined.
+/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
+/// but most of the time it does not generate any instructions.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph128_ph256)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_castph128_ph256(a: __m128h) -> __m256h {
+ simd_shuffle!(
+ a,
+ _mm_undefined_ph(),
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
+ )
+}
+
+/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are undefined.
+/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
+/// but most of the time it does not generate any instructions.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph128_ph512)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_castph128_ph512(a: __m128h) -> __m512h {
+ simd_shuffle!(
+ a,
+ _mm_undefined_ph(),
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8
+ ]
+ )
+}
+
+/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are undefined.
+/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
+/// but most of the time it does not generate any instructions.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph256_ph512)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_castph256_ph512(a: __m256h) -> __m512h {
+ simd_shuffle!(
+ a,
+ _mm256_undefined_ph(),
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16
+ ]
+ )
+}
+
+/// Cast vector of type `__m256h` to type `__m128h`. The upper 8 elements of the result are zeroed.
+/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
+/// any instructions.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextph128_ph256)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_zextph128_ph256(a: __m128h) -> __m256h {
+ simd_shuffle!(
+ a,
+ _mm_setzero_ph(),
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
+ )
+}
+
+/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are zeroed.
+/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
+/// any instructions.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph128_ph512)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_zextph128_ph512(a: __m128h) -> __m512h {
+ simd_shuffle!(
+ a,
+ _mm_setzero_ph(),
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8
+ ]
+ )
+}
+
+/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are zeroed.
+/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
+/// any instructions.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph256_ph512)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_zextph256_ph512(a: __m256h) -> __m512h {
+ simd_shuffle!(
+ a,
+ _mm256_setzero_ph(),
+ [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16
+ ]
+ )
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
+/// operand specified by imm8, and return the boolean result (0 or 1).
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_comi_round_sh(a: __m128h, b: __m128h) -> i32 {
+ static_assert_sae!(SAE);
+ vcomish(a, b, IMM8, SAE)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
+/// operand specified by imm8, and return the boolean result (0 or 1).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_comi_sh(a: __m128h, b: __m128h) -> i32 {
+ _mm_comi_round_sh::(a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and return
+/// the boolean result (0 or 1).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 {
+ _mm_comi_sh::<_CMP_EQ_OS>(a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
+/// and return the boolean result (0 or 1).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 {
+ _mm_comi_sh::<_CMP_GE_OS>(a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
+/// the boolean result (0 or 1).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 {
+ _mm_comi_sh::<_CMP_GT_OS>(a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
+/// return the boolean result (0 or 1).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 {
+ _mm_comi_sh::<_CMP_LE_OS>(a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
+/// the boolean result (0 or 1).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 {
+ _mm_comi_sh::<_CMP_LT_OS>(a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
+/// the boolean result (0 or 1).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 {
+ _mm_comi_sh::<_CMP_NEQ_OS>(a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and
+/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 {
+ _mm_comi_sh::<_CMP_EQ_OQ>(a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
+/// and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 {
+ _mm_comi_sh::<_CMP_GE_OQ>(a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
+/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 {
+ _mm_comi_sh::<_CMP_GT_OQ>(a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
+/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 {
+ _mm_comi_sh::<_CMP_LE_OQ>(a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
+/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 {
+ _mm_comi_sh::<_CMP_LT_OQ>(a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
+/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 {
+ _mm_comi_sh::<_CMP_NEQ_OQ>(a, b)
+}
+
+/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into
+/// a new vector. The address must be aligned to 16 bytes or a general-protection exception may be generated.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_load_ph(mem_addr: *const f16) -> __m128h {
+ *mem_addr.cast()
+}
+
+/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into
+/// a new vector. The address must be aligned to 32 bytes or a general-protection exception may be generated.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_load_ph(mem_addr: *const f16) -> __m256h {
+ *mem_addr.cast()
+}
+
+/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into
+/// a new vector. The address must be aligned to 64 bytes or a general-protection exception may be generated.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_load_ph(mem_addr: *const f16) -> __m512h {
+ *mem_addr.cast()
+}
+
+/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector,
+/// and zero the upper elements
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_load_sh(mem_addr: *const f16) -> __m128h {
+ _mm_set_sh(*mem_addr)
+}
+
+/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector
+/// using writemask k (the element is copied from src when mask bit 0 is not set), and zero the upper elements.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16,sse,avx512f")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_load_sh(src: __m128h, k: __mmask8, mem_addr: *const f16) -> __m128h {
+ let mut dst = src;
+ asm!(
+ vpl!("vmovsh {dst}{{{k}}}"),
+ dst = inout(xmm_reg) dst,
+ k = in(kreg) k,
+ p = in(reg) mem_addr,
+ options(pure, nomem, nostack, preserves_flags)
+ );
+ dst
+}
+
+/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector
+/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and zero the upper elements.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16,sse,avx512f")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_load_sh(k: __mmask8, mem_addr: *const f16) -> __m128h {
+ let mut dst: __m128h;
+ asm!(
+ vpl!("vmovsh {dst}{{{k}}}{{z}}"),
+ dst = out(xmm_reg) dst,
+ k = in(kreg) k,
+ p = in(reg) mem_addr,
+ options(pure, nomem, nostack, preserves_flags)
+ );
+ dst
+}
+
+/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into
+/// a new vector. The address does not need to be aligned to any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_loadu_ph(mem_addr: *const f16) -> __m128h {
+ ptr::read_unaligned(mem_addr.cast())
+}
+
+/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into
+/// a new vector. The address does not need to be aligned to any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_loadu_ph(mem_addr: *const f16) -> __m256h {
+ ptr::read_unaligned(mem_addr.cast())
+}
+
+/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into
+/// a new vector. The address does not need to be aligned to any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_loadu_ph(mem_addr: *const f16) -> __m512h {
+ ptr::read_unaligned(mem_addr.cast())
+}
+
+/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst
+/// using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper
+/// 7 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_move_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ let mut mov: f16 = simd_extract!(src, 0);
+ if (k & 1) != 0 {
+ mov = simd_extract!(b, 0);
+ }
+ simd_insert!(a, 0, mov)
+}
+
+/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst
+/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
+/// elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_move_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ let mut mov: f16 = 0.;
+ if (k & 1) != 0 {
+ mov = simd_extract!(b, 0);
+ }
+ simd_insert!(a, 0, mov)
+}
+
+/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst,
+/// and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_move_sh(a: __m128h, b: __m128h) -> __m128h {
+ let mov: f16 = simd_extract!(b, 0);
+ simd_insert!(a, 0, mov)
+}
+
+/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory.
+/// The address must be aligned to 16 bytes or a general-protection exception may be generated.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_store_ph(mem_addr: *mut f16, a: __m128h) {
+ *mem_addr.cast() = a;
+}
+
+/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory.
+/// The address must be aligned to 32 bytes or a general-protection exception may be generated.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_store_ph(mem_addr: *mut f16, a: __m256h) {
+ *mem_addr.cast() = a;
+}
+
+/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory.
+/// The address must be aligned to 64 bytes or a general-protection exception may be generated.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_store_ph(mem_addr: *mut f16, a: __m512h) {
+ *mem_addr.cast() = a;
+}
+
+/// Store the lower half-precision (16-bit) floating-point element from a into memory.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_store_sh(mem_addr: *mut f16, a: __m128h) {
+ *mem_addr = simd_extract!(a, 0);
+}
+
+/// Store the lower half-precision (16-bit) floating-point element from a into memory using writemask k
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16,sse,avx512f")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_store_sh(mem_addr: *mut f16, k: __mmask8, a: __m128h) {
+ asm!(
+ vps!("vmovdqu16", "{{{k}}}, {src}"),
+ p = in(reg) mem_addr,
+ k = in(kreg) k,
+ src = in(xmm_reg) a,
+ options(nostack, preserves_flags)
+ );
+}
+
+/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory.
+/// The address does not need to be aligned to any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_storeu_ph(mem_addr: *mut f16, a: __m128h) {
+ ptr::write_unaligned(mem_addr.cast(), a);
+}
+
+/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory.
+/// The address does not need to be aligned to any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_storeu_ph(mem_addr: *mut f16, a: __m256h) {
+ ptr::write_unaligned(mem_addr.cast(), a);
+}
+
+/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory.
+/// The address does not need to be aligned to any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_storeu_ph(mem_addr: *mut f16, a: __m512h) {
+ ptr::write_unaligned(mem_addr.cast(), a);
+}
+
+/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vaddph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h {
+ simd_add(a, b)
+}
+
+/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vaddph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ let r = _mm_add_ph(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vaddph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ let r = _mm_add_ph(a, b);
+ simd_select_bitmask(k, r, _mm_setzero_ph())
+}
+
+/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vaddph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h {
+ simd_add(a, b)
+}
+
+/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vaddph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ let r = _mm256_add_ph(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vaddph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ let r = _mm256_add_ph(a, b);
+ simd_select_bitmask(k, r, _mm256_setzero_ph())
+}
+
+/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vaddph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h {
+ simd_add(a, b)
+}
+
+/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vaddph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ let r = _mm512_add_ph(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vaddph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ let r = _mm512_add_ph(a, b);
+ simd_select_bitmask(k, r, _mm512_setzero_ph())
+}
+
+/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_add_round_ph(a: __m512h, b: __m512h) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ vaddph(a, b, ROUNDING)
+}
+
+/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_add_round_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ let r = _mm512_add_round_ph::(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_add_round_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ let r = _mm512_add_round_ph::(a, b);
+ simd_select_bitmask(k, r, _mm512_setzero_ph())
+}
+
+/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_add_round_sh(a: __m128h, b: __m128h) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_add_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// writemask k (the element is copied from src when mask bit 0 is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_add_round_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ vaddsh(a, b, src, k, ROUNDING)
+}
+
+/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_add_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_add_round_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vaddsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
+}
+
+/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// writemask k (the element is copied from src when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vaddsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
+
+/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// zeromask k (the element is zeroed out when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vaddsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_maskz_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
+}
+
+/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vsubph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h {
+ simd_sub(a, b)
+}
+
+/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vsubph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ let r = _mm_sub_ph(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vsubph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ let r = _mm_sub_ph(a, b);
+ simd_select_bitmask(k, r, _mm_setzero_ph())
+}
+
+/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vsubph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h {
+ simd_sub(a, b)
+}
+
+/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vsubph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ let r = _mm256_sub_ph(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vsubph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ let r = _mm256_sub_ph(a, b);
+ simd_select_bitmask(k, r, _mm256_setzero_ph())
+}
+
+/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsubph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h {
+ simd_sub(a, b)
+}
+
+/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsubph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ let r = _mm512_sub_ph(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsubph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ let r = _mm512_sub_ph(a, b);
+ simd_select_bitmask(k, r, _mm512_setzero_ph())
+}
+
+/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_sub_round_ph(a: __m512h, b: __m512h) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ vsubph(a, b, ROUNDING)
+}
+
+/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_sub_round_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ let r = _mm512_sub_round_ph::(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_sub_round_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ let r = _mm512_sub_round_ph::(a, b);
+ simd_select_bitmask(k, r, _mm512_setzero_ph())
+}
+
+/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_sub_round_sh(a: __m128h, b: __m128h) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_sub_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// writemask k (the element is copied from src when mask bit 0 is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_sub_round_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ vsubsh(a, b, src, k, ROUNDING)
+}
+
+/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_sub_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_sub_round_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsubsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
+}
+
+/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// writemask k (the element is copied from src when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsubsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
+
+/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// zeromask k (the element is zeroed out when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsubsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_maskz_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmulph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h {
+ simd_mul(a, b)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmulph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ let r = _mm_mul_ph(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmulph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ let r = _mm_mul_ph(a, b);
+ simd_select_bitmask(k, r, _mm_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmulph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h {
+ simd_mul(a, b)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmulph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ let r = _mm256_mul_ph(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmulph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ let r = _mm256_mul_ph(a, b);
+ simd_select_bitmask(k, r, _mm256_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmulph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h {
+ simd_mul(a, b)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmulph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ let r = _mm512_mul_ph(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmulph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ let r = _mm512_mul_ph(a, b);
+ simd_select_bitmask(k, r, _mm512_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mul_round_ph(a: __m512h, b: __m512h) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ vmulph(a, b, ROUNDING)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_mul_round_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ let r = _mm512_mul_round_ph::(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_mul_round_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ let r = _mm512_mul_round_ph::(a, b);
+ simd_select_bitmask(k, r, _mm512_setzero_ph())
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mul_round_sh(a: __m128h, b: __m128h) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_mul_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// writemask k (the element is copied from src when mask bit 0 is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_mul_round_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ vmulsh(a, b, src, k, ROUNDING)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_mul_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_mul_round_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmulsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// writemask k (the element is copied from src when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmulsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// zeromask k (the element is zeroed out when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmulsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_maskz_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
+}
+
+/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vdivph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h {
+ simd_div(a, b)
+}
+
+/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vdivph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ let r = _mm_div_ph(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vdivph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ let r = _mm_div_ph(a, b);
+ simd_select_bitmask(k, r, _mm_setzero_ph())
+}
+
+/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vdivph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h {
+ simd_div(a, b)
+}
+
+/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_div_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vdivph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ let r = _mm256_div_ph(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_div_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vdivph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ let r = _mm256_div_ph(a, b);
+ simd_select_bitmask(k, r, _mm256_setzero_ph())
+}
+
+/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vdivph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h {
+ simd_div(a, b)
+}
+
+/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vdivph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ let r = _mm512_div_ph(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vdivph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ let r = _mm512_div_ph(a, b);
+ simd_select_bitmask(k, r, _mm512_setzero_ph())
+}
+
+/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_round_ph)
+
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_div_round_ph(a: __m512h, b: __m512h) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ vdivph(a, b, ROUNDING)
+}
+
+/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
+/// writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_div_round_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ let r = _mm512_div_round_ph::(a, b);
+ simd_select_bitmask(k, r, src)
+}
+
+/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
+/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_div_round_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ let r = _mm512_div_round_ph::(a, b);
+ simd_select_bitmask(k, r, _mm512_setzero_ph())
+}
+
+/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_div_round_sh(a: __m128h, b: __m128h) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_div_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// writemask k (the element is copied from src when mask bit 0 is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_div_round_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ vdivsh(a, b, src, k, ROUNDING)
+}
+
+/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_div_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_div_round_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vdivsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
+}
+
+/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// writemask k (the element is copied from src when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vdivsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
+
+/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
+/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
+/// zeromask k (the element is zeroed out when mask bit 0 is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vdivsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_maskz_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
+ fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
+
+ #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
+ fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
+ fn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
+ fn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
+ fn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
+ fn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
+ fn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
+ fn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
+ fn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::x86::*;
+ use crate::mem::transmute;
+ use crate::ptr::{addr_of, addr_of_mut};
+ use stdarch_test::simd_test;
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set_ph() {
+ let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_set_ph() {
+ let r = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let e = _mm256_setr_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_set_ph() {
+ let r = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let e = _mm512_setr_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set_sh() {
+ let r = _mm_set_sh(1.0);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set1_ph() {
+ let r = _mm_set1_ph(1.0);
+ let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_set1_ph() {
+ let r = _mm256_set1_ph(1.0);
+ let e = _mm256_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_set1_ph() {
+ let r = _mm512_set1_ph(1.0);
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_setr_ph() {
+ let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_setr_ph() {
+ let r = _mm256_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let e = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_setr_ph() {
+ let r = _mm512_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let e = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_setzero_ph() {
+ let r = _mm_setzero_ph();
+ let e = _mm_set1_ph(0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_setzero_ph() {
+ let r = _mm256_setzero_ph();
+ let e = _mm256_set1_ph(0.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_setzero_ph() {
+ let r = _mm512_setzero_ph();
+ let e = _mm512_set1_ph(0.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castsi128_ph() {
+ let a = _mm_set1_epi16(0x3c00);
+ let r = _mm_castsi128_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castsi256_ph() {
+ let a = _mm256_set1_epi16(0x3c00);
+ let r = _mm256_castsi256_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castsi512_ph() {
+ let a = _mm512_set1_epi16(0x3c00);
+ let r = _mm512_castsi512_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_si128() {
+ let a = _mm_set1_ph(1.0);
+ let r = _mm_castph_si128(a);
+ let e = _mm_set1_epi16(0x3c00);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_si256() {
+ let a = _mm256_set1_ph(1.0);
+ let r = _mm256_castph_si256(a);
+ let e = _mm256_set1_epi16(0x3c00);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_si512() {
+ let a = _mm512_set1_ph(1.0);
+ let r = _mm512_castph_si512(a);
+ let e = _mm512_set1_epi16(0x3c00);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castps_ph() {
+ let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
+ let r = _mm_castps_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castps_ph() {
+ let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
+ let r = _mm256_castps_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castps_ph() {
+ let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
+ let r = _mm512_castps_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_ps() {
+ let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
+ let r = _mm_castph_ps(a);
+ let e = _mm_set1_ps(1.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_ps() {
+ let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
+ let r = _mm256_castph_ps(a);
+ let e = _mm256_set1_ps(1.0);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_ps() {
+ let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
+ let r = _mm512_castph_ps(a);
+ let e = _mm512_set1_ps(1.0);
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castpd_ph() {
+ let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
+ let r = _mm_castpd_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castpd_ph() {
+ let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
+ let r = _mm256_castpd_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castpd_ph() {
+ let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
+ let r = _mm512_castpd_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_pd() {
+ let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
+ let r = _mm_castph_pd(a);
+ let e = _mm_set1_pd(1.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_pd() {
+ let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
+ let r = _mm256_castph_pd(a);
+ let e = _mm256_set1_pd(1.0);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_pd() {
+ let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
+ let r = _mm512_castph_pd(a);
+ let e = _mm512_set1_pd(1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph256_ph128() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm256_castph256_ph128(a);
+ let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph512_ph128() {
+ let a = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
+ 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ );
+ let r = _mm512_castph512_ph128(a);
+ let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph512_ph256() {
+ let a = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
+ 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ );
+ let r = _mm512_castph512_ph256(a);
+ let e = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph128_ph256() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm256_castph128_ph256(a);
+ assert_eq_m128h(_mm256_castph256_ph128(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph128_ph512() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_castph128_ph512(a);
+ assert_eq_m128h(_mm512_castph512_ph128(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph256_ph512() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_castph256_ph512(a);
+ assert_eq_m256h(_mm512_castph512_ph256(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_zextph128_ph256() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm256_zextph128_ph256(a);
+ let e = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_zextph128_ph512() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_zextph128_ph512(a);
+ let e = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_zextph256_ph512() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_zextph256_ph512(a);
+ let e = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comi_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comi_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comieq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comieq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comige_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comige_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comigt_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comigt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comile_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comile_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comilt_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comilt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comineq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comineq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomieq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomieq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomige_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomige_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomigt_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomigt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomile_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomile_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomilt_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomilt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomineq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomineq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_load_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_load_ph(addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_load_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_load_ph(addr_of!(a).cast());
+ assert_eq_m256h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_load_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_load_ph(addr_of!(a).cast());
+ assert_eq_m512h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_load_sh(addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let src = _mm_set_sh(2.);
+ let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
+ assert_eq_m128h(src, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
+ assert_eq_m128h(_mm_setzero_ph(), b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_loadu_ph() {
+ let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
+ let r = _mm_loadu_ph(array.as_ptr());
+ let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_loadu_ph() {
+ let array = [
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ ];
+ let r = _mm256_loadu_ph(array.as_ptr());
+ let e = _mm256_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_loadu_ph() {
+ let array = [
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ ];
+ let r = _mm512_loadu_ph(array.as_ptr());
+ let e = _mm512_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let r = _mm_move_sh(a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let src = _mm_set_sh(10.0);
+ let r = _mm_mask_move_sh(src, 0, a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let r = _mm_maskz_move_sh(0, a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_store_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let mut b = _mm_setzero_ph();
+ _mm_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_store_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let mut b = _mm256_setzero_ph();
+ _mm256_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m256h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_store_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let mut b = _mm512_setzero_ph();
+ _mm512_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m512h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_store_sh() {
+ let a = _mm_set_sh(1.0);
+ let mut b = _mm_setzero_ph();
+ _mm_store_sh(addr_of_mut!(b).cast(), a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_store_sh() {
+ let a = _mm_set_sh(1.0);
+ let mut b = _mm_setzero_ph();
+ _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
+ assert_eq_m128h(_mm_setzero_ph(), b);
+ _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_storeu_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let mut array = [0.0; 8];
+ _mm_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_storeu_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let mut array = [0.0; 16];
+ _mm256_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_storeu_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let mut array = [0.0; 32];
+ _mm512_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_add_ph(a, b);
+ let e = _mm_set1_ph(9.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_add_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_add_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_add_ph(a, b);
+ let e = _mm256_set1_ph(17.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_add_ph(a, b);
+ let e = _mm512_set1_ph(33.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
+ 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
+ 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(33.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
+ 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
+ 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_add_sh(a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_add_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_add_sh(src, 1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_add_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_add_sh(1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_sub_ph(a, b);
+ let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_sub_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_sub_ph(a, b);
+ let e = _mm256_set_ph(
+ -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
+ 15.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_sub_ph(a, b);
+ let e = _mm512_set_ph(
+ -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
+ -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
+ 23.0, 25.0, 27.0, 29.0, 31.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
+ 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
+ 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set_ph(
+ -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
+ -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
+ 23.0, 25.0, 27.0, 29.0, 31.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
+ 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
+ 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_sub_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_sub_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_sub_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_sub_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_sub_sh(a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_sub_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_sub_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_sub_sh(src, 1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_sub_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_sub_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_sub_sh(1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_mul_ph(a, b);
+ let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_mul_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_mul_ph(a, b);
+ let e = _mm256_set_ph(
+ 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
+ 30.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mul_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_mul_ph(a, b);
+ let e = _mm512_set_ph(
+ 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
+ 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
+ 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_mul_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
+ 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_mul_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
+ 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mul_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set_ph(
+ 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
+ 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
+ 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_mul_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
+ 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_mul_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
+ 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mul_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_mul_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_mul_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mul_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_mul_sh(a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_mul_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_mul_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_mul_sh(src, 1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_mul_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_mul_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_mul_sh(1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let r = _mm_div_ph(a, b);
+ let e = _mm_set1_ph(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
+ let r = _mm_mask_div_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let r = _mm_maskz_div_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let r = _mm256_div_ph(a, b);
+ let e = _mm256_set1_ph(0.5);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let src = _mm256_set_ph(
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0,
+ );
+ let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_div_ph(a, b);
+ let e = _mm512_set1_ph(0.5);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let src = _mm512_set_ph(
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
+ 33.0, 34.0, 35.0,
+ );
+ let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(0.5);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let src = _mm512_set_ph(
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
+ 33.0, 34.0, 35.0,
+ );
+ let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_div_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_div_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_div_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_div_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_div_sh(a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_div_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_div_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_div_sh(src, 1, a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_div_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_div_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_div_sh(1, a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
+ }
+}
diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs
index d3d4381cc7..6f10d828dd 100644
--- a/crates/core_arch/src/x86/mod.rs
+++ b/crates/core_arch/src/x86/mod.rs
@@ -1004,3 +1004,7 @@ pub use self::avx512bf16::*;
mod avxneconvert;
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub use self::avxneconvert::*;
+
+mod avx512fp16;
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub use self::avx512fp16::*;
From bf92f837fcdda6dac7d12332bcfa758c1f227f56 Mon Sep 17 00:00:00 2001
From: sayantn
Date: Mon, 8 Jul 2024 20:00:07 +0530
Subject: [PATCH 03/11] AVX512_FP16 Part 2: Complex Multiplication
---
crates/core_arch/missing-x86.md | 76 -
crates/core_arch/src/x86/avx512fp16.rs | 4591 ++++++++++++++++++------
2 files changed, 3431 insertions(+), 1236 deletions(-)
diff --git a/crates/core_arch/missing-x86.md b/crates/core_arch/missing-x86.md
index 7bc2456ddd..c66e1e728c 100644
--- a/crates/core_arch/missing-x86.md
+++ b/crates/core_arch/missing-x86.md
@@ -58,8 +58,6 @@
* [ ] [`_mm512_abs_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ph)
* [ ] [`_mm512_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_ph_mask)
* [ ] [`_mm512_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask)
- * [ ] [`_mm512_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_pch)
- * [ ] [`_mm512_cmul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_round_pch)
* [ ] [`_mm512_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conj_pch)
* [ ] [`_mm512_cvt_roundepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi16_ph)
* [ ] [`_mm512_cvt_roundepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ph)
@@ -108,8 +106,6 @@
* [ ] [`_mm512_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph)
* [ ] [`_mm512_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_pch)
* [ ] [`_mm512_fcmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_round_pch)
- * [ ] [`_mm512_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_pch)
- * [ ] [`_mm512_fcmul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_round_pch)
* [ ] [`_mm512_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_pch)
* [ ] [`_mm512_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_ph)
* [ ] [`_mm512_fmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_pch)
@@ -120,8 +116,6 @@
* [ ] [`_mm512_fmsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_round_ph)
* [ ] [`_mm512_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_ph)
* [ ] [`_mm512_fmsubadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_round_ph)
- * [ ] [`_mm512_fmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_pch)
- * [ ] [`_mm512_fmul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_round_pch)
* [ ] [`_mm512_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_ph)
* [ ] [`_mm512_fnmadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_round_ph)
* [ ] [`_mm512_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_ph)
@@ -150,8 +144,6 @@
* [ ] [`_mm512_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph)
* [ ] [`_mm512_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask)
* [ ] [`_mm512_mask_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask)
- * [ ] [`_mm512_mask_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_pch)
- * [ ] [`_mm512_mask_cmul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_round_pch)
* [ ] [`_mm512_mask_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conj_pch)
* [ ] [`_mm512_mask_cvt_roundepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi16_ph)
* [ ] [`_mm512_mask_cvt_roundepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ph)
@@ -199,8 +191,6 @@
* [ ] [`_mm512_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph)
* [ ] [`_mm512_mask_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_pch)
* [ ] [`_mm512_mask_fcmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_round_pch)
- * [ ] [`_mm512_mask_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_pch)
- * [ ] [`_mm512_mask_fcmul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_round_pch)
* [ ] [`_mm512_mask_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_pch)
* [ ] [`_mm512_mask_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_ph)
* [ ] [`_mm512_mask_fmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_pch)
@@ -211,8 +201,6 @@
* [ ] [`_mm512_mask_fmsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_round_ph)
* [ ] [`_mm512_mask_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_ph)
* [ ] [`_mm512_mask_fmsubadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_round_ph)
- * [ ] [`_mm512_mask_fmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_pch)
- * [ ] [`_mm512_mask_fmul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_round_pch)
* [ ] [`_mm512_mask_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_ph)
* [ ] [`_mm512_mask_fnmadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_round_ph)
* [ ] [`_mm512_mask_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_ph)
@@ -226,8 +214,6 @@
* [ ] [`_mm512_mask_max_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_round_ph)
* [ ] [`_mm512_mask_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_ph)
* [ ] [`_mm512_mask_min_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_round_ph)
- * [ ] [`_mm512_mask_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pch)
- * [ ] [`_mm512_mask_mul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_pch)
* [ ] [`_mm512_mask_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp_ph)
* [ ] [`_mm512_mask_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph)
* [ ] [`_mm512_mask_reduce_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph)
@@ -238,8 +224,6 @@
* [ ] [`_mm512_mask_scalef_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph)
* [ ] [`_mm512_mask_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_ph)
* [ ] [`_mm512_mask_sqrt_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_ph)
- * [ ] [`_mm512_maskz_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_pch)
- * [ ] [`_mm512_maskz_cmul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_round_pch)
* [ ] [`_mm512_maskz_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conj_pch)
* [ ] [`_mm512_maskz_cvt_roundepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph)
* [ ] [`_mm512_maskz_cvt_roundepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph)
@@ -287,8 +271,6 @@
* [ ] [`_mm512_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph)
* [ ] [`_mm512_maskz_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_pch)
* [ ] [`_mm512_maskz_fcmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_round_pch)
- * [ ] [`_mm512_maskz_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_pch)
- * [ ] [`_mm512_maskz_fcmul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_round_pch)
* [ ] [`_mm512_maskz_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_pch)
* [ ] [`_mm512_maskz_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_ph)
* [ ] [`_mm512_maskz_fmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_pch)
@@ -299,8 +281,6 @@
* [ ] [`_mm512_maskz_fmsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_round_ph)
* [ ] [`_mm512_maskz_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_ph)
* [ ] [`_mm512_maskz_fmsubadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_round_ph)
- * [ ] [`_mm512_maskz_fmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_pch)
- * [ ] [`_mm512_maskz_fmul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_round_pch)
* [ ] [`_mm512_maskz_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_ph)
* [ ] [`_mm512_maskz_fnmadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_round_ph)
* [ ] [`_mm512_maskz_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_ph)
@@ -313,8 +293,6 @@
* [ ] [`_mm512_maskz_max_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_round_ph)
* [ ] [`_mm512_maskz_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_ph)
* [ ] [`_mm512_maskz_min_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_round_ph)
- * [ ] [`_mm512_maskz_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pch)
- * [ ] [`_mm512_maskz_mul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_pch)
* [ ] [`_mm512_maskz_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rcp_ph)
* [ ] [`_mm512_maskz_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph)
* [ ] [`_mm512_maskz_reduce_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph)
@@ -329,8 +307,6 @@
* [ ] [`_mm512_max_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_round_ph)
* [ ] [`_mm512_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_ph)
* [ ] [`_mm512_min_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_round_ph)
- * [ ] [`_mm512_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_pch)
- * [ ] [`_mm512_mul_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_pch)
* [ ] [`_mm512_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph)
* [ ] [`_mm512_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph)
* [ ] [`_mm512_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp_ph)
@@ -348,10 +324,6 @@
* [ ] [`_mm512_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_pch)
* [ ] [`_mm512_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_ph)
* [ ] [`_mm512_sqrt_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_round_ph)
- * [ ] [`_mm_cmp_round_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sh_mask)
- * [ ] [`_mm_cmp_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sh_mask)
- * [ ] [`_mm_cmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_round_sch)
- * [ ] [`_mm_cmul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_sch)
* [ ] [`_mm_cvt_roundi32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh)
* [ ] [`_mm_cvt_roundi64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sh)
* [ ] [`_mm_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_sh)
@@ -389,16 +361,12 @@
* [ ] [`_mm_cvtu64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sh)
* [ ] [`_mm_fcmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_round_sch)
* [ ] [`_mm_fcmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_sch)
- * [ ] [`_mm_fcmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_round_sch)
- * [ ] [`_mm_fcmul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_sch)
* [ ] [`_mm_fmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sch)
* [ ] [`_mm_fmadd_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sh)
* [ ] [`_mm_fmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sch)
* [ ] [`_mm_fmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sh)
* [ ] [`_mm_fmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_round_sh)
* [ ] [`_mm_fmsub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_sh)
- * [ ] [`_mm_fmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_round_sch)
- * [ ] [`_mm_fmul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_sch)
* [ ] [`_mm_fnmadd_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_round_sh)
* [ ] [`_mm_fnmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_sh)
* [ ] [`_mm_fnmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_round_sh)
@@ -420,10 +388,6 @@
* [ ] [`_mm_mask3_fnmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_sh)
* [ ] [`_mm_mask3_fnmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_round_sh)
* [ ] [`_mm_mask3_fnmsub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_sh)
- * [ ] [`_mm_mask_cmp_round_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sh_mask)
- * [ ] [`_mm_mask_cmp_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sh_mask)
- * [ ] [`_mm_mask_cmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_round_sch)
- * [ ] [`_mm_mask_cmul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_sch)
* [ ] [`_mm_mask_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh)
* [ ] [`_mm_mask_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd)
* [ ] [`_mm_mask_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss)
@@ -434,16 +398,12 @@
* [ ] [`_mm_mask_cvtss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh)
* [ ] [`_mm_mask_fcmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_round_sch)
* [ ] [`_mm_mask_fcmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_sch)
- * [ ] [`_mm_mask_fcmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_round_sch)
- * [ ] [`_mm_mask_fcmul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_sch)
* [ ] [`_mm_mask_fmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sch)
* [ ] [`_mm_mask_fmadd_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sh)
* [ ] [`_mm_mask_fmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sch)
* [ ] [`_mm_mask_fmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sh)
* [ ] [`_mm_mask_fmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_round_sh)
* [ ] [`_mm_mask_fmsub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_sh)
- * [ ] [`_mm_mask_fmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_round_sch)
- * [ ] [`_mm_mask_fmul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_sch)
* [ ] [`_mm_mask_fnmadd_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_round_sh)
* [ ] [`_mm_mask_fnmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_sh)
* [ ] [`_mm_mask_fnmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_round_sh)
@@ -453,8 +413,6 @@
* [ ] [`_mm_mask_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh)
* [ ] [`_mm_mask_getmant_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh)
* [ ] [`_mm_mask_getmant_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh)
- * [ ] [`_mm_mask_mul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sch)
- * [ ] [`_mm_mask_mul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sch)
* [ ] [`_mm_mask_rcp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_sh)
* [ ] [`_mm_mask_reduce_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh)
* [ ] [`_mm_mask_reduce_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh)
@@ -465,8 +423,6 @@
* [ ] [`_mm_mask_scalef_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh)
* [ ] [`_mm_mask_sqrt_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_round_sh)
* [ ] [`_mm_mask_sqrt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_sh)
- * [ ] [`_mm_maskz_cmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_round_sch)
- * [ ] [`_mm_maskz_cmul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_sch)
* [ ] [`_mm_maskz_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh)
* [ ] [`_mm_maskz_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd)
* [ ] [`_mm_maskz_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss)
@@ -477,16 +433,12 @@
* [ ] [`_mm_maskz_cvtss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh)
* [ ] [`_mm_maskz_fcmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_round_sch)
* [ ] [`_mm_maskz_fcmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_sch)
- * [ ] [`_mm_maskz_fcmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_round_sch)
- * [ ] [`_mm_maskz_fcmul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_sch)
* [ ] [`_mm_maskz_fmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sch)
* [ ] [`_mm_maskz_fmadd_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sh)
* [ ] [`_mm_maskz_fmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sch)
* [ ] [`_mm_maskz_fmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sh)
* [ ] [`_mm_maskz_fmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_round_sh)
* [ ] [`_mm_maskz_fmsub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_sh)
- * [ ] [`_mm_maskz_fmul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_round_sch)
- * [ ] [`_mm_maskz_fmul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_sch)
* [ ] [`_mm_maskz_fnmadd_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_round_sh)
* [ ] [`_mm_maskz_fnmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_sh)
* [ ] [`_mm_maskz_fnmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_round_sh)
@@ -495,8 +447,6 @@
* [ ] [`_mm_maskz_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh)
* [ ] [`_mm_maskz_getmant_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh)
* [ ] [`_mm_maskz_getmant_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh)
- * [ ] [`_mm_maskz_mul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sch)
- * [ ] [`_mm_maskz_mul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sch)
* [ ] [`_mm_maskz_rcp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_sh)
* [ ] [`_mm_maskz_reduce_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh)
* [ ] [`_mm_maskz_reduce_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh)
@@ -507,8 +457,6 @@
* [ ] [`_mm_maskz_scalef_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh)
* [ ] [`_mm_maskz_sqrt_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_round_sh)
* [ ] [`_mm_maskz_sqrt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_sh)
- * [ ] [`_mm_mul_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sch)
- * [ ] [`_mm_mul_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sch)
* [ ] [`_mm_rcp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_sh)
* [ ] [`_mm_reduce_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh)
* [ ] [`_mm_reduce_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh)
@@ -527,7 +475,6 @@
* [ ] [`_mm256_abs_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_ph)
* [ ] [`_mm256_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ph_mask)
- * [ ] [`_mm256_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmul_pch)
* [ ] [`_mm256_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conj_pch)
* [ ] [`_mm256_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_ph)
* [ ] [`_mm256_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ph)
@@ -552,13 +499,11 @@
* [ ] [`_mm256_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps)
* [ ] [`_mm256_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph)
* [ ] [`_mm256_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmadd_pch)
- * [ ] [`_mm256_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmul_pch)
* [ ] [`_mm256_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pch)
* [ ] [`_mm256_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ph)
* [ ] [`_mm256_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_ph)
* [ ] [`_mm256_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ph)
* [ ] [`_mm256_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_ph)
- * [ ] [`_mm256_fmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmul_pch)
* [ ] [`_mm256_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ph)
* [ ] [`_mm256_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_ph)
* [ ] [`_mm256_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask)
@@ -574,7 +519,6 @@
* [ ] [`_mm256_mask3_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmsub_ph)
* [ ] [`_mm256_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph)
* [ ] [`_mm256_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask)
- * [ ] [`_mm256_mask_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmul_pch)
* [ ] [`_mm256_mask_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conj_pch)
* [ ] [`_mm256_mask_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph)
* [ ] [`_mm256_mask_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi32_ph)
@@ -599,13 +543,11 @@
* [ ] [`_mm256_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps)
* [ ] [`_mm256_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph)
* [ ] [`_mm256_mask_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmadd_pch)
- * [ ] [`_mm256_mask_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmul_pch)
* [ ] [`_mm256_mask_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_pch)
* [ ] [`_mm256_mask_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_ph)
* [ ] [`_mm256_mask_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmaddsub_ph)
* [ ] [`_mm256_mask_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsub_ph)
* [ ] [`_mm256_mask_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsubadd_ph)
- * [ ] [`_mm256_mask_fmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmul_pch)
* [ ] [`_mm256_mask_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmadd_ph)
* [ ] [`_mm256_mask_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmsub_ph)
* [ ] [`_mm256_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask)
@@ -613,14 +555,12 @@
* [ ] [`_mm256_mask_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph)
* [ ] [`_mm256_mask_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_ph)
* [ ] [`_mm256_mask_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_ph)
- * [ ] [`_mm256_mask_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_pch)
* [ ] [`_mm256_mask_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rcp_ph)
* [ ] [`_mm256_mask_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph)
* [ ] [`_mm256_mask_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph)
* [ ] [`_mm256_mask_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rsqrt_ph)
* [ ] [`_mm256_mask_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph)
* [ ] [`_mm256_mask_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sqrt_ph)
- * [ ] [`_mm256_maskz_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cmul_pch)
* [ ] [`_mm256_maskz_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conj_pch)
* [ ] [`_mm256_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph)
* [ ] [`_mm256_maskz_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph)
@@ -645,20 +585,17 @@
* [ ] [`_mm256_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps)
* [ ] [`_mm256_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph)
* [ ] [`_mm256_maskz_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmadd_pch)
- * [ ] [`_mm256_maskz_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmul_pch)
* [ ] [`_mm256_maskz_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_pch)
* [ ] [`_mm256_maskz_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_ph)
* [ ] [`_mm256_maskz_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmaddsub_ph)
* [ ] [`_mm256_maskz_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsub_ph)
* [ ] [`_mm256_maskz_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsubadd_ph)
- * [ ] [`_mm256_maskz_fmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmul_pch)
* [ ] [`_mm256_maskz_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmadd_ph)
* [ ] [`_mm256_maskz_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmsub_ph)
* [ ] [`_mm256_maskz_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph)
* [ ] [`_mm256_maskz_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph)
* [ ] [`_mm256_maskz_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_ph)
* [ ] [`_mm256_maskz_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_ph)
- * [ ] [`_mm256_maskz_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_pch)
* [ ] [`_mm256_maskz_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rcp_ph)
* [ ] [`_mm256_maskz_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph)
* [ ] [`_mm256_maskz_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph)
@@ -667,7 +604,6 @@
* [ ] [`_mm256_maskz_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sqrt_ph)
* [ ] [`_mm256_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ph)
* [ ] [`_mm256_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ph)
- * [ ] [`_mm256_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pch)
* [ ] [`_mm256_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph)
* [ ] [`_mm256_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph)
* [ ] [`_mm256_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ph)
@@ -682,7 +618,6 @@
* [ ] [`_mm256_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ph)
* [ ] [`_mm_abs_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_ph)
* [ ] [`_mm_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask)
- * [ ] [`_mm_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_pch)
* [ ] [`_mm_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conj_pch)
* [ ] [`_mm_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph)
* [ ] [`_mm_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph)
@@ -707,13 +642,11 @@
* [ ] [`_mm_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps)
* [ ] [`_mm_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph)
* [ ] [`_mm_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_pch)
- * [ ] [`_mm_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_pch)
* [ ] [`_mm_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pch)
* [ ] [`_mm_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ph)
* [ ] [`_mm_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_ph)
* [ ] [`_mm_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ph)
* [ ] [`_mm_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_ph)
- * [ ] [`_mm_fmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_pch)
* [ ] [`_mm_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ph)
* [ ] [`_mm_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ph)
* [ ] [`_mm_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask)
@@ -729,7 +662,6 @@
* [ ] [`_mm_mask3_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_ph)
* [ ] [`_mm_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph)
* [ ] [`_mm_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask)
- * [ ] [`_mm_mask_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_pch)
* [ ] [`_mm_mask_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conj_pch)
* [ ] [`_mm_mask_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph)
* [ ] [`_mm_mask_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi32_ph)
@@ -754,13 +686,11 @@
* [ ] [`_mm_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps)
* [ ] [`_mm_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph)
* [ ] [`_mm_mask_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_pch)
- * [ ] [`_mm_mask_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_pch)
* [ ] [`_mm_mask_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_pch)
* [ ] [`_mm_mask_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_ph)
* [ ] [`_mm_mask_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmaddsub_ph)
* [ ] [`_mm_mask_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_ph)
* [ ] [`_mm_mask_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsubadd_ph)
- * [ ] [`_mm_mask_fmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_pch)
* [ ] [`_mm_mask_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_ph)
* [ ] [`_mm_mask_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_ph)
* [ ] [`_mm_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask)
@@ -772,14 +702,12 @@
* [ ] [`_mm_mask_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_ph)
* [ ] [`_mm_mask_min_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_round_sh)
* [ ] [`_mm_mask_min_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_sh)
- * [ ] [`_mm_mask_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_pch)
* [ ] [`_mm_mask_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_ph)
* [ ] [`_mm_mask_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph)
* [ ] [`_mm_mask_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph)
* [ ] [`_mm_mask_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_ph)
* [ ] [`_mm_mask_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph)
* [ ] [`_mm_mask_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_ph)
- * [ ] [`_mm_maskz_cmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_pch)
* [ ] [`_mm_maskz_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conj_pch)
* [ ] [`_mm_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph)
* [ ] [`_mm_maskz_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph)
@@ -804,13 +732,11 @@
* [ ] [`_mm_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps)
* [ ] [`_mm_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph)
* [ ] [`_mm_maskz_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_pch)
- * [ ] [`_mm_maskz_fcmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_pch)
* [ ] [`_mm_maskz_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_pch)
* [ ] [`_mm_maskz_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_ph)
* [ ] [`_mm_maskz_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ph)
* [ ] [`_mm_maskz_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_ph)
* [ ] [`_mm_maskz_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsubadd_ph)
- * [ ] [`_mm_maskz_fmul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_pch)
* [ ] [`_mm_maskz_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_ph)
* [ ] [`_mm_maskz_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_ph)
* [ ] [`_mm_maskz_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph)
@@ -821,7 +747,6 @@
* [ ] [`_mm_maskz_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_ph)
* [ ] [`_mm_maskz_min_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_round_sh)
* [ ] [`_mm_maskz_min_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_sh)
- * [ ] [`_mm_maskz_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_pch)
* [ ] [`_mm_maskz_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_ph)
* [ ] [`_mm_maskz_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph)
* [ ] [`_mm_maskz_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph)
@@ -834,7 +759,6 @@
* [ ] [`_mm_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ph)
* [ ] [`_mm_min_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_round_sh)
* [ ] [`_mm_min_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sh)
- * [ ] [`_mm_mul_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pch)
* [ ] [`_mm_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph)
* [ ] [`_mm_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph)
* [ ] [`_mm_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ph)
diff --git a/crates/core_arch/src/x86/avx512fp16.rs b/crates/core_arch/src/x86/avx512fp16.rs
index c6eeff1904..a2a31d87e9 100644
--- a/crates/core_arch/src/x86/avx512fp16.rs
+++ b/crates/core_arch/src/x86/avx512fp16.rs
@@ -615,6 +615,69 @@ pub unsafe fn _mm512_zextph128_ph512(a: __m128h) -> __m512h {
)
}
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
+/// operand specified by imm8, and store the result in mask vector k. Exceptions can be suppressed by
+/// passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sh_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cmp_round_sh_mask(
+ a: __m128h,
+ b: __m128h,
+) -> __mmask8 {
+ static_assert_sae!(SAE);
+ _mm_mask_cmp_round_sh_mask::(0xff, a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
+/// operand specified by imm8, and store the result in mask vector k using zeromask k1. Exceptions can be
+/// suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sh_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cmp_round_sh_mask(
+ k1: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __mmask8 {
+ static_assert_sae!(SAE);
+ vcmpsh(a, b, IMM8, k1, SAE)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
+/// operand specified by imm8, and store the result in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sh_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cmp_sh_mask(a: __m128h, b: __m128h) -> __mmask8 {
+ _mm_cmp_round_sh_mask::(a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
+/// operand specified by imm8, and store the result in mask vector k using zeromask k1.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sh_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cmp_sh_mask(
+ k1: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __mmask8 {
+ _mm_mask_cmp_round_sh_mask::(k1, a, b)
+}
+
/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are zeroed.
/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
/// any instructions.
@@ -1236,7 +1299,7 @@ pub unsafe fn _mm512_maskz_add_round_ph(
/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
/// _MM_FROUND_CUR_DIRECTION
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_ph)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sh)
#[inline]
#[target_feature(enable = "avx512fp16")]
#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
@@ -2227,1778 +2290,3986 @@ pub unsafe fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
_mm_maskz_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
}
-#[allow(improper_ctypes)]
-extern "C" {
- #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
- fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
+/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
+/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
+/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_mul_pch(_mm_undefined_ph(), 0xff, a, b)
+}
- #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
- fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
- fn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
- fn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
- fn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
+/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ transmute(vfmulcph_128(transmute(a), transmute(b), transmute(src), k))
+}
- #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
- fn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
- fn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
- fn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
- fn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_mul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_mul_pch(_mm_setzero_ph(), k, a, b)
+}
+/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
+/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
+/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h {
+ _mm256_mask_mul_pch(_mm256_undefined_ph(), 0xff, a, b)
}
-#[cfg(test)]
-mod tests {
- use crate::core_arch::x86::*;
- use crate::mem::transmute;
- use crate::ptr::{addr_of, addr_of_mut};
- use stdarch_test::simd_test;
+/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
+/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
+ transmute(vfmulcph_256(transmute(a), transmute(b), transmute(src), k))
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set_ph() {
- let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- assert_eq_m128h(r, e);
- }
+/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_mul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
+ _mm256_mask_mul_pch(_mm256_setzero_ph(), k, a, b)
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_set_ph() {
- let r = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let e = _mm256_setr_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- assert_eq_m256h(r, e);
- }
+/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
+/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
+/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mask_mul_pch(_mm512_undefined_ph(), 0xffff, a, b)
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_set_ph() {
- let r = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let e = _mm512_setr_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- assert_eq_m512h(r, e);
- }
+/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
+/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mask_mul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set_sh() {
- let r = _mm_set_sh(1.0);
- let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
- assert_eq_m128h(r, e);
- }
+/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_mul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mask_mul_pch(_mm512_setzero_ph(), k, a, b)
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set1_ph() {
- let r = _mm_set1_ph(1.0);
- let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
- assert_eq_m128h(r, e);
- }
+/// Multiply the packed complex numbers in a and b, and store the results in dst. Each complex number is
+/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
+/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mul_round_pch(a: __m512h, b: __m512h) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_mul_round_pch::(_mm512_undefined_ph(), 0xffff, a, b)
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_set1_ph() {
- let r = _mm256_set1_ph(1.0);
- let e = _mm256_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- );
- assert_eq_m256h(r, e);
- }
+/// Multiply the packed complex numbers in a and b, and store the results in dst using writemask k (the element
+/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_mul_round_pch(
+ src: __m512h,
+ k: __mmask16,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ transmute(vfmulcph_512(
+ transmute(a),
+ transmute(b),
+ transmute(src),
+ k,
+ ROUNDING,
+ ))
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_set1_ph() {
- let r = _mm512_set1_ph(1.0);
- let e = _mm512_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- );
- assert_eq_m512h(r, e);
- }
+/// Multiply the packed complex numbers in a and b, and store the results in dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_mul_round_pch(
+ k: __mmask16,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_mul_round_pch::(_mm512_setzero_ph(), k, a, b)
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_setr_ph() {
- let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- assert_eq_m128h(r, e);
- }
+/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst,
+/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
+/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
+/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mul_sch(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_mul_sch(_mm_undefined_ph(), 0xff, a, b)
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_setr_ph() {
- let r = _mm256_setr_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let e = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- assert_eq_m256h(r, e);
- }
+/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
+/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed
+/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_mul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_mul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_setr_ph() {
- let r = _mm512_setr_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let e = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
+/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements
+/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_mul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_mul_sch(_mm_setzero_ph(), k, a, b)
+}
+
+/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst,
+/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
+/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
+/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mul_round_sch(a: __m128h, b: __m128h) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_mul_round_sch::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
+/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed
+/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_mul_round_sch(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ transmute(vfmulcsh(
+ transmute(a),
+ transmute(b),
+ transmute(src),
+ k,
+ ROUNDING,
+ ))
+}
+
+/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
+/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements
+/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_mul_round_sch(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_mul_round_sch::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
+/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
+/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mul_pch(a, b)
+}
+
+/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
+/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_mul_pch(src, k, a, b)
+}
+
+/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_maskz_mul_pch(k, a, b)
+}
+
+/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
+/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
+/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h {
+ _mm256_mul_pch(a, b)
+}
+
+/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
+/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
+ _mm256_mask_mul_pch(src, k, a, b)
+}
+
+/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_fmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
+ _mm256_maskz_mul_pch(k, a, b)
+}
+
+/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed
+/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mul_pch(a, b)
+}
+
+/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
+/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mask_mul_pch(src, k, a, b)
+}
+
+/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
+ _mm512_maskz_mul_pch(k, a, b)
+}
+
+/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed
+/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fmul_round_pch(a: __m512h, b: __m512h) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mul_round_pch::(a, b)
+}
+
+/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
+/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fmul_round_pch(
+ src: __m512h,
+ k: __mmask16,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_mul_round_pch::(src, k, a, b)
+}
+
+/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fmul_round_pch(
+ k: __mmask16,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_maskz_mul_round_pch::(k, a, b)
+}
+
+/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is
+/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
+/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fmul_sch(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mul_sch(a, b)
+}
+
+/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element
+/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_mul_sch(src, k, a, b)
+}
+
+/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_maskz_mul_sch(k, a, b)
+}
+
+/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is composed
+/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fmul_round_sch(a: __m128h, b: __m128h) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mul_round_sch::(a, b)
+}
+
+/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element
+/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fmul_round_sch(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_mul_round_sch::(src, k, a, b)
+}
+
+/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fmul_round_sch(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_maskz_mul_round_sch::(k, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_cmul_pch(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ transmute(vfcmulcph_128(transmute(a), transmute(b), transmute(src), k))
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_cmul_pch(_mm_setzero_ph(), k, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h {
+ _mm256_mask_cmul_pch(_mm256_undefined_ph(), 0xff, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
+ transmute(vfcmulcph_256(transmute(a), transmute(b), transmute(src), k))
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
+ _mm256_mask_cmul_pch(_mm256_setzero_ph(), k, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mask_cmul_pch(_mm512_undefined_ph(), 0xffff, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mask_cmul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mask_cmul_pch(_mm512_setzero_ph(), k, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cmul_round_pch(a: __m512h, b: __m512h) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cmul_round_pch::(_mm512_undefined_ph(), 0xffff, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cmul_round_pch(
+ src: __m512h,
+ k: __mmask16,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ transmute(vfcmulcph_512(
+ transmute(a),
+ transmute(b),
+ transmute(src),
+ k,
+ ROUNDING,
+ ))
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cmul_round_pch(
+ k: __mmask16,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cmul_round_pch::(_mm512_setzero_ph(), k, a, b)
+}
+
+/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
+/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cmul_sch(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_cmul_sch(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
+/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_cmul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
+
+/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
+/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_cmul_sch(_mm_setzero_ph(), k, a, b)
+}
+
+/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
+/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cmul_round_sch(a: __m128h, b: __m128h) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_cmul_round_sch::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
+/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cmul_round_sch(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ transmute(vfcmulcsh(
+ transmute(a),
+ transmute(b),
+ transmute(src),
+ k,
+ ROUNDING,
+ ))
+}
+
+/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
+/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cmul_round_sch(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_cmul_round_sch::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h {
+ _mm_cmul_pch(a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_cmul_pch(src, k, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fcmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_maskz_cmul_pch(k, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h {
+ _mm256_cmul_pch(a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
+ _mm256_mask_cmul_pch(src, k, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_fcmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
+ _mm256_maskz_cmul_pch(k, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h {
+ _mm512_cmul_pch(a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mask_cmul_pch(src, k, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fcmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
+ _mm512_maskz_cmul_pch(k, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fcmul_round_pch(a: __m512h, b: __m512h) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_cmul_round_pch::(a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fcmul_round_pch(
+ src: __m512h,
+ k: __mmask16,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cmul_round_pch::(src, k, a, b)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
+/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fcmul_round_pch(
+ k: __mmask16,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_maskz_cmul_round_pch::(k, a, b)
+}
+
+/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
+/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fcmul_sch(a: __m128h, b: __m128h) -> __m128h {
+ _mm_cmul_sch(a, b)
+}
+
+/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
+/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fcmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_cmul_sch(src, k, a, b)
+}
+
+/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
+/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fcmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_maskz_cmul_sch(k, a, b)
+}
+
+/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
+/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fcmul_round_sch(a: __m128h, b: __m128h) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_cmul_round_sch::(a, b)
+}
+
+/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
+/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fcmul_round_sch(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_cmul_round_sch::(src, k, a, b)
+}
+
+/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
+/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fcmul_round_sch(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_maskz_cmul_round_sch::(k, a, b)
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
+ fn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
+ #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
+ fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
+
+ #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
+ fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
+ fn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
+ fn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
+ fn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
+ fn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
+ fn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
+ fn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
+ fn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
+ fn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
+ fn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
+ fn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
+ fn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
+ fn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
+ fn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
+ fn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
+ fn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::x86::*;
+ use crate::mem::transmute;
+ use crate::ptr::{addr_of, addr_of_mut};
+ use stdarch_test::simd_test;
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
+ _mm_setr_ph(re, im, re, im, re, im, re, im)
+ }
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
+ _mm256_setr_ph(
+ re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
+ )
+ }
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
+ _mm512_setr_ph(
+ re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
+ re, im, re, im, re, im, re, im, re, im,
+ )
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set_ph() {
+ let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_set_ph() {
+ let r = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let e = _mm256_setr_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_set_ph() {
+ let r = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let e = _mm512_setr_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set_sh() {
+ let r = _mm_set_sh(1.0);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set1_ph() {
+ let r = _mm_set1_ph(1.0);
+ let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_set1_ph() {
+ let r = _mm256_set1_ph(1.0);
+ let e = _mm256_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_set1_ph() {
+ let r = _mm512_set1_ph(1.0);
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_setr_ph() {
+ let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_setr_ph() {
+ let r = _mm256_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let e = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_setr_ph() {
+ let r = _mm512_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let e = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
3.0, 2.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_setzero_ph() {
- let r = _mm_setzero_ph();
- let e = _mm_set1_ph(0.0);
+ unsafe fn test_mm_setzero_ph() {
+ let r = _mm_setzero_ph();
+ let e = _mm_set1_ph(0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_setzero_ph() {
+ let r = _mm256_setzero_ph();
+ let e = _mm256_set1_ph(0.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_setzero_ph() {
+ let r = _mm512_setzero_ph();
+ let e = _mm512_set1_ph(0.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castsi128_ph() {
+ let a = _mm_set1_epi16(0x3c00);
+ let r = _mm_castsi128_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castsi256_ph() {
+ let a = _mm256_set1_epi16(0x3c00);
+ let r = _mm256_castsi256_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castsi512_ph() {
+ let a = _mm512_set1_epi16(0x3c00);
+ let r = _mm512_castsi512_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_si128() {
+ let a = _mm_set1_ph(1.0);
+ let r = _mm_castph_si128(a);
+ let e = _mm_set1_epi16(0x3c00);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_si256() {
+ let a = _mm256_set1_ph(1.0);
+ let r = _mm256_castph_si256(a);
+ let e = _mm256_set1_epi16(0x3c00);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_si512() {
+ let a = _mm512_set1_ph(1.0);
+ let r = _mm512_castph_si512(a);
+ let e = _mm512_set1_epi16(0x3c00);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castps_ph() {
+ let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
+ let r = _mm_castps_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castps_ph() {
+ let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
+ let r = _mm256_castps_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castps_ph() {
+ let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
+ let r = _mm512_castps_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_ps() {
+ let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
+ let r = _mm_castph_ps(a);
+ let e = _mm_set1_ps(1.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_ps() {
+ let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
+ let r = _mm256_castph_ps(a);
+ let e = _mm256_set1_ps(1.0);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_ps() {
+ let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
+ let r = _mm512_castph_ps(a);
+ let e = _mm512_set1_ps(1.0);
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castpd_ph() {
+ let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
+ let r = _mm_castpd_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castpd_ph() {
+ let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
+ let r = _mm256_castpd_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castpd_ph() {
+ let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
+ let r = _mm512_castpd_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_pd() {
+ let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
+ let r = _mm_castph_pd(a);
+ let e = _mm_set1_pd(1.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_pd() {
+ let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
+ let r = _mm256_castph_pd(a);
+ let e = _mm256_set1_pd(1.0);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_pd() {
+ let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
+ let r = _mm512_castph_pd(a);
+ let e = _mm512_set1_pd(1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph256_ph128() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm256_castph256_ph128(a);
+ let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph512_ph128() {
+ let a = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
+ 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ );
+ let r = _mm512_castph512_ph128(a);
+ let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph512_ph256() {
+ let a = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
+ 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ );
+ let r = _mm512_castph512_ph256(a);
+ let e = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph128_ph256() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm256_castph128_ph256(a);
+ assert_eq_m128h(_mm256_castph256_ph128(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph128_ph512() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_castph128_ph512(a);
+ assert_eq_m128h(_mm512_castph512_ph128(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph256_ph512() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_castph256_ph512(a);
+ assert_eq_m256h(_mm512_castph512_ph256(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_zextph128_ph256() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm256_zextph128_ph256(a);
+ let e = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_zextph128_ph512() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_zextph128_ph512(a);
+ let e = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_zextph256_ph512() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_zextph256_ph512(a);
+ let e = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cmp_round_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cmp_round_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cmp_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cmp_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comi_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comi_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comieq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comieq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comige_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comige_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comigt_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comigt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comile_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comile_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comilt_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comilt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comineq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comineq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomieq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomieq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomige_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomige_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomigt_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomigt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomile_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomile_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomilt_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomilt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomineq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomineq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_load_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_load_ph(addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_load_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_load_ph(addr_of!(a).cast());
+ assert_eq_m256h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_load_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_load_ph(addr_of!(a).cast());
+ assert_eq_m512h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_load_sh(addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let src = _mm_set_sh(2.);
+ let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
+ assert_eq_m128h(src, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
+ assert_eq_m128h(_mm_setzero_ph(), b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_loadu_ph() {
+ let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
+ let r = _mm_loadu_ph(array.as_ptr());
+ let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_setzero_ph() {
- let r = _mm256_setzero_ph();
- let e = _mm256_set1_ph(0.0);
+ unsafe fn test_mm256_loadu_ph() {
+ let array = [
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ ];
+ let r = _mm256_loadu_ph(array.as_ptr());
+ let e = _mm256_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_setzero_ph() {
- let r = _mm512_setzero_ph();
- let e = _mm512_set1_ph(0.0);
+ unsafe fn test_mm512_loadu_ph() {
+ let array = [
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ ];
+ let r = _mm512_loadu_ph(array.as_ptr());
+ let e = _mm512_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castsi128_ph() {
- let a = _mm_set1_epi16(0x3c00);
- let r = _mm_castsi128_ph(a);
- let e = _mm_set1_ph(1.0);
+ unsafe fn test_mm_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let r = _mm_move_sh(a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castsi256_ph() {
- let a = _mm256_set1_epi16(0x3c00);
- let r = _mm256_castsi256_ph(a);
- let e = _mm256_set1_ph(1.0);
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_mask_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let src = _mm_set_sh(10.0);
+ let r = _mm_mask_move_sh(src, 0, a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castsi512_ph() {
- let a = _mm512_set1_epi16(0x3c00);
- let r = _mm512_castsi512_ph(a);
- let e = _mm512_set1_ph(1.0);
- assert_eq_m512h(r, e);
+ unsafe fn test_mm_maskz_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let r = _mm_maskz_move_sh(0, a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_store_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let mut b = _mm_setzero_ph();
+ _mm_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_store_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let mut b = _mm256_setzero_ph();
+ _mm256_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m256h(a, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_si128() {
- let a = _mm_set1_ph(1.0);
- let r = _mm_castph_si128(a);
- let e = _mm_set1_epi16(0x3c00);
- assert_eq_m128i(r, e);
+ unsafe fn test_mm512_store_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let mut b = _mm512_setzero_ph();
+ _mm512_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m512h(a, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_si256() {
- let a = _mm256_set1_ph(1.0);
- let r = _mm256_castph_si256(a);
- let e = _mm256_set1_epi16(0x3c00);
- assert_eq_m256i(r, e);
+ unsafe fn test_mm_store_sh() {
+ let a = _mm_set_sh(1.0);
+ let mut b = _mm_setzero_ph();
+ _mm_store_sh(addr_of_mut!(b).cast(), a);
+ assert_eq_m128h(a, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_si512() {
- let a = _mm512_set1_ph(1.0);
- let r = _mm512_castph_si512(a);
- let e = _mm512_set1_epi16(0x3c00);
- assert_eq_m512i(r, e);
+ unsafe fn test_mm_mask_store_sh() {
+ let a = _mm_set_sh(1.0);
+ let mut b = _mm_setzero_ph();
+ _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
+ assert_eq_m128h(_mm_setzero_ph(), b);
+ _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_storeu_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let mut array = [0.0; 8];
+ _mm_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_storeu_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let mut array = [0.0; 16];
+ _mm256_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castps_ph() {
- let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
- let r = _mm_castps_ph(a);
- let e = _mm_set1_ph(1.0);
+ unsafe fn test_mm512_storeu_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let mut array = [0.0; 32];
+ _mm512_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_add_ph(a, b);
+ let e = _mm_set1_ph(9.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castps_ph() {
- let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
- let r = _mm256_castps_ph(a);
- let e = _mm256_set1_ph(1.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_add_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_add_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_add_ph(a, b);
+ let e = _mm256_set1_ph(17.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castps_ph() {
- let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
- let r = _mm512_castps_ph(a);
- let e = _mm512_set1_ph(1.0);
+ unsafe fn test_mm512_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_add_ph(a, b);
+ let e = _mm512_set1_ph(33.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
+ 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_ps() {
- let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
- let r = _mm_castph_ps(a);
- let e = _mm_set1_ps(1.0);
- assert_eq_m128(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_ps() {
- let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
- let r = _mm256_castph_ps(a);
- let e = _mm256_set1_ps(1.0);
- assert_eq_m256(r, e);
+ unsafe fn test_mm512_maskz_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
+ 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_ps() {
- let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
- let r = _mm512_castph_ps(a);
- let e = _mm512_set1_ps(1.0);
- assert_eq_m512(r, e);
+ unsafe fn test_mm512_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(33.0);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castpd_ph() {
- let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
- let r = _mm_castpd_ph(a);
- let e = _mm_set1_ph(1.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_mask_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
+ 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castpd_ph() {
- let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
- let r = _mm256_castpd_ph(a);
- let e = _mm256_set1_ph(1.0);
- assert_eq_m256h(r, e);
+ unsafe fn test_mm512_maskz_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
+ 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castpd_ph() {
- let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
- let r = _mm512_castpd_ph(a);
- let e = _mm512_set1_ph(1.0);
- assert_eq_m512h(r, e);
+ unsafe fn test_mm_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_pd() {
- let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
- let r = _mm_castph_pd(a);
- let e = _mm_set1_pd(1.0);
- assert_eq_m128d(r, e);
+ unsafe fn test_mm_mask_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_pd() {
- let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
- let r = _mm256_castph_pd(a);
- let e = _mm256_set1_pd(1.0);
- assert_eq_m256d(r, e);
+ unsafe fn test_mm_maskz_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_pd() {
- let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
- let r = _mm512_castph_pd(a);
- let e = _mm512_set1_pd(1.0);
- assert_eq_m512d(r, e);
+ unsafe fn test_mm_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_add_sh(a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph256_ph128() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
- );
- let r = _mm256_castph256_ph128(a);
- let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ unsafe fn test_mm_mask_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_add_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_add_sh(src, 1, a, b);
+ let e = _mm_set_sh(3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph512_ph128() {
- let a = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
- 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
- );
- let r = _mm512_castph512_ph128(a);
- let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ unsafe fn test_mm_maskz_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_add_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_add_sh(1, a, b);
+ let e = _mm_set_sh(3.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph512_ph256() {
- let a = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
- 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
- );
- let r = _mm512_castph512_ph256(a);
- let e = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
- );
- assert_eq_m256h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_sub_ph(a, b);
+ let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph128_ph256() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm256_castph128_ph256(a);
- assert_eq_m128h(_mm256_castph256_ph128(r), a);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph128_ph512() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm512_castph128_ph512(a);
- assert_eq_m128h(_mm512_castph512_ph128(r), a);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_sub_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph256_ph512() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- let r = _mm512_castph256_ph512(a);
- assert_eq_m256h(_mm512_castph512_ph256(r), a);
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_sub_ph(a, b);
+ let e = _mm256_set_ph(
+ -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
+ 15.0,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_zextph128_ph256() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm256_zextph128_ph256(a);
- let e = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
);
assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_zextph128_ph512() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm512_zextph128_ph512(a);
- let e = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
- 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- assert_eq_m512h(r, e);
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_zextph256_ph512() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ unsafe fn test_mm512_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- let r = _mm512_zextph256_ph512(a);
- let e = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
- 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_sub_ph(a, b);
+ let e = _mm512_set_ph(
+ -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
+ -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
+ 23.0, 25.0, 27.0, 29.0, 31.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comi_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_mask_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
+ 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comi_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_maskz_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
+ 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comieq_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comieq_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set_ph(
+ -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
+ -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
+ 23.0, 25.0, 27.0, 29.0, 31.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comige_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comige_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_mask_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
+ 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comigt_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comigt_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_maskz_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
+ 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comile_sh() {
+ unsafe fn test_mm_sub_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_comile_sh(a, b);
- assert_eq!(r, 1);
+ let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comilt_sh() {
+ unsafe fn test_mm_mask_sub_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_comilt_sh(a, b);
- assert_eq!(r, 1);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comineq_sh() {
+ unsafe fn test_mm_maskz_sub_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_comineq_sh(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomieq_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomieq_sh(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomige_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomige_sh(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomigt_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomigt_sh(a, b);
- assert_eq!(r, 1);
+ let r =
+ _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomile_sh() {
+ unsafe fn test_mm_sub_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_ucomile_sh(a, b);
- assert_eq!(r, 1);
+ let r = _mm_sub_sh(a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomilt_sh() {
+ unsafe fn test_mm_mask_sub_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_ucomilt_sh(a, b);
- assert_eq!(r, 1);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_sub_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_sub_sh(src, 1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomineq_sh() {
+ unsafe fn test_mm_maskz_sub_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_ucomineq_sh(a, b);
- assert_eq!(r, 1);
+ let r = _mm_maskz_sub_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_sub_sh(1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_load_ph() {
+ unsafe fn test_mm_mul_ph() {
let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_load_ph(addr_of!(a).cast());
- assert_eq_m128h(a, b);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_mul_ph(a, b);
+ let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_load_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_load_ph(addr_of!(a).cast());
- assert_eq_m256h(a, b);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_load_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_load_ph(addr_of!(a).cast());
- assert_eq_m512h(a, b);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_load_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_load_sh(addr_of!(a).cast());
- assert_eq_m128h(a, b);
+ unsafe fn test_mm_mask_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_load_sh() {
- let a = _mm_set_sh(1.0);
- let src = _mm_set_sh(2.);
- let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
- assert_eq_m128h(a, b);
- let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
- assert_eq_m128h(src, b);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_mul_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_load_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
- assert_eq_m128h(a, b);
- let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
- assert_eq_m128h(_mm_setzero_ph(), b);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_mul_ph(a, b);
+ let e = _mm256_set_ph(
+ 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
+ 30.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_loadu_ph() {
- let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
- let r = _mm_loadu_ph(array.as_ptr());
- let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm256_mask_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_loadu_ph() {
- let array = [
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- ];
- let r = _mm256_loadu_ph(array.as_ptr());
- let e = _mm256_setr_ph(
+ unsafe fn test_mm256_maskz_mul_ph() {
+ let a = _mm256_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_loadu_ph() {
- let array = [
+ unsafe fn test_mm512_mul_ph() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
- ];
- let r = _mm512_loadu_ph(array.as_ptr());
- let e = _mm512_setr_ph(
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_mul_ph(a, b);
+ let e = _mm512_set_ph(
+ 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
+ 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
+ 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_mul_ph() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
+ 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let r = _mm_move_sh(a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_maskz_mul_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
+ 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let src = _mm_set_sh(10.0);
- let r = _mm_mask_move_sh(src, 0, a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_mul_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set_ph(
+ 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
+ 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
+ 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let r = _mm_maskz_move_sh(0, a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_store_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let mut b = _mm_setzero_ph();
- _mm_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m128h(a, b);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_store_ph() {
- let a = _mm256_set_ph(
+ unsafe fn test_mm512_mask_mul_round_ph() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
);
- let mut b = _mm256_setzero_ph();
- _mm256_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m256h(a, b);
+ let e = _mm512_set_ph(
+ 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
+ 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_store_ph() {
+ unsafe fn test_mm512_maskz_mul_round_ph() {
let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
- let mut b = _mm512_setzero_ph();
- _mm512_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m512h(a, b);
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
+ 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_store_sh() {
+ unsafe fn test_mm_mul_round_sh() {
let a = _mm_set_sh(1.0);
- let mut b = _mm_setzero_ph();
- _mm_store_sh(addr_of_mut!(b).cast(), a);
- assert_eq_m128h(a, b);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_store_sh() {
+ unsafe fn test_mm_mask_mul_round_sh() {
let a = _mm_set_sh(1.0);
- let mut b = _mm_setzero_ph();
- _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
- assert_eq_m128h(_mm_setzero_ph(), b);
- _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
- assert_eq_m128h(a, b);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_storeu_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let mut array = [0.0; 8];
- _mm_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_mul_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_storeu_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let mut array = [0.0; 16];
- _mm256_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mul_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_mul_sh(a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_storeu_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let mut array = [0.0; 32];
- _mm512_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
+ unsafe fn test_mm_mask_mul_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_mul_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_mul_sh(src, 1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_mul_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_mul_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_mul_sh(1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_add_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_add_ph(a, b);
- let e = _mm_set1_ph(9.0);
+ unsafe fn test_mm_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let r = _mm_div_ph(a, b);
+ let e = _mm_set1_ph(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_add_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_add_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
+ unsafe fn test_mm_mask_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
+ let r = _mm_mask_div_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_add_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_add_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
+ unsafe fn test_mm_maskz_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let r = _mm_maskz_div_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_add_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_add_ph(a, b);
- let e = _mm256_set1_ph(17.0);
+ unsafe fn test_mm256_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let r = _mm256_div_ph(a, b);
+ let e = _mm256_set1_ph(0.5);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_add_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
+ unsafe fn test_mm256_mask_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0,
);
- let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
+ let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_add_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
+ unsafe fn test_mm256_maskz_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_add_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_add_ph(a, b);
- let e = _mm512_set1_ph(33.0);
+ unsafe fn test_mm512_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_div_ph(a, b);
+ let e = _mm512_set1_ph(0.5);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_add_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
+ unsafe fn test_mm512_mask_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
+ 33.0, 34.0, 35.0,
);
- let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
+ let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
- 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_add_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_maskz_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
- 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_add_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(33.0);
+ unsafe fn test_mm512_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(0.5);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_add_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
+ unsafe fn test_mm512_mask_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
+ 33.0, 34.0, 35.0,
);
- let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
0b01010101010101010101010101010101,
a,
b,
);
let e = _mm512_set_ph(
- 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
- 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_add_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
b,
);
let e = _mm512_set_ph(
- 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
- 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_add_round_sh() {
+ unsafe fn test_mm_div_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(3.0);
+ let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_add_round_sh() {
+ unsafe fn test_mm_mask_div_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
let src = _mm_set_sh(4.0);
- let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
let e = _mm_set_sh(4.0);
assert_eq_m128h(r, e);
- let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 1, a, b,
);
- let e = _mm_set_sh(3.0);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_add_round_sh() {
+ unsafe fn test_mm_maskz_div_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
let r =
- _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
let e = _mm_set_sh(0.0);
assert_eq_m128h(r, e);
let r =
- _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(3.0);
+ _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_add_sh() {
+ unsafe fn test_mm_div_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_add_sh(a, b);
- let e = _mm_set_sh(3.0);
+ let r = _mm_div_sh(a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_add_sh() {
+ unsafe fn test_mm_mask_div_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
let src = _mm_set_sh(4.0);
- let r = _mm_mask_add_sh(src, 0, a, b);
+ let r = _mm_mask_div_sh(src, 0, a, b);
let e = _mm_set_sh(4.0);
assert_eq_m128h(r, e);
- let r = _mm_mask_add_sh(src, 1, a, b);
- let e = _mm_set_sh(3.0);
+ let r = _mm_mask_div_sh(src, 1, a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_add_sh() {
+ unsafe fn test_mm_maskz_div_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_maskz_add_sh(0, a, b);
+ let r = _mm_maskz_div_sh(0, a, b);
let e = _mm_set_sh(0.0);
assert_eq_m128h(r, e);
- let r = _mm_maskz_add_sh(1, a, b);
- let e = _mm_set_sh(3.0);
+ let r = _mm_maskz_div_sh(1, a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_sub_ph(a, b);
- let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
+ unsafe fn test_mm_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_mul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
+ unsafe fn test_mm_mask_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_mul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_sub_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
+ unsafe fn test_mm_maskz_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_maskz_mul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_sub_ph(a, b);
- let e = _mm256_set_ph(
- -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
- 15.0,
- );
+ unsafe fn test_mm256_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_mul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ unsafe fn test_mm256_mask_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
+ let r = _mm256_mask_mul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ unsafe fn test_mm256_maskz_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_mul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_mul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
- let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
- assert_eq_m256h(r, e);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
+ unsafe fn test_mm512_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b0101010101010101,
+ a,
+ b,
);
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
- let r = _mm512_sub_ph(a, b);
- let e = _mm512_set_ph(
- -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
- -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
- 23.0, 25.0, 27.0, 29.0, 31.0,
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r =
+ _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_mul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_mul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_maskz_mul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_fmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_fmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_maskz_fmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_fmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
+ let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_fmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
- );
- let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
- 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ unsafe fn test_mm512_mask_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
- 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ unsafe fn test_mm512_maskz_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set_ph(
- -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
- -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
- 23.0, 25.0, 27.0, 29.0, 31.0,
- );
+ unsafe fn test_mm512_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
- );
- let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
- 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
- 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(-1.0);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm_maskz_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
let r =
- _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(-1.0);
+ _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_sub_sh(a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_fmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_sub_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_sub_sh(src, 1, a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_mask_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_sub_sh(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_sub_sh(1, a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_maskz_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_maskz_fmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_mul_ph(a, b);
- let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
+ unsafe fn test_mm_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_cmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
+ unsafe fn test_mm_mask_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_cmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_mul_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
+ unsafe fn test_mm_maskz_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_maskz_cmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_mul_ph(a, b);
- let e = _mm256_set_ph(
- 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
- 30.0, 16.0,
- );
+ unsafe fn test_mm256_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_cmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ unsafe fn test_mm256_mask_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
- );
- let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
+ let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
+ unsafe fn test_mm256_maskz_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_mul_ph(a, b);
- let e = _mm512_set_ph(
- 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
- 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
- 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
- );
+ unsafe fn test_mm512_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_cmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
- );
- let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
- 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ unsafe fn test_mm512_mask_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
- 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set_ph(
- 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
- 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
- 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
- );
+ unsafe fn test_mm512_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
- );
- let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
- 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
- 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_cmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_mask_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_cmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r =
- _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_maskz_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_maskz_cmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_mul_sh(a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_mul_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_mul_sh(src, 1, a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_mask_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_mul_sh(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_mul_sh(1, a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_maskz_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r =
+ _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_div_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let r = _mm_div_ph(a, b);
- let e = _mm_set1_ph(0.5);
+ unsafe fn test_mm_fcmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_fcmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_div_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
- let r = _mm_mask_div_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
+ unsafe fn test_mm_mask_fcmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_fcmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_div_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let r = _mm_maskz_div_ph(0b01010101, a, b);
- let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ unsafe fn test_mm_maskz_fcmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_maskz_fcmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_div_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let r = _mm256_div_ph(a, b);
- let e = _mm256_set1_ph(0.5);
+ unsafe fn test_mm256_fcmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_fcmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_div_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let src = _mm256_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0,
+ unsafe fn test_mm256_mask_fcmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_div_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ unsafe fn test_mm256_maskz_fcmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_div_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_div_ph(a, b);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_fcmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_fcmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_div_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let src = _mm512_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
- 33.0, 34.0, 35.0,
- );
- let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
- 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ unsafe fn test_mm512_mask_fcmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_div_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ unsafe fn test_mm512_maskz_fcmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_div_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_fcmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_div_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let src = _mm512_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
- 33.0, 34.0, 35.0,
- );
- let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_fcmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
- 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_div_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_fcmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_fcmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_fcmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_mask_fcmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fcmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r =
- _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_maskz_fcmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_maskz_fcmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_div_sh(a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_fcmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_div_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_div_sh(src, 1, a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_mask_fcmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_div_sh(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_div_sh(1, a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_maskz_fcmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r =
+ _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
}
From 0bec23b9ff5b680c0808bf3e6ba905cb5b099515 Mon Sep 17 00:00:00 2001
From: sayantn
Date: Fri, 12 Jul 2024 12:39:31 +0530
Subject: [PATCH 04/11] AVX512FP16 Part 3: FMA
---
crates/core_arch/missing-x86.md | 188 -
crates/core_arch/src/x86/avx512fp16.rs | 9660 +++++++++++++++++++-----
2 files changed, 7894 insertions(+), 1954 deletions(-)
diff --git a/crates/core_arch/missing-x86.md b/crates/core_arch/missing-x86.md
index c66e1e728c..08b3ab9a18 100644
--- a/crates/core_arch/missing-x86.md
+++ b/crates/core_arch/missing-x86.md
@@ -55,10 +55,8 @@
* [ ] [`_mm256_cvtsh_h`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsh_h)
* [ ] [`_mm256_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_pch)
- * [ ] [`_mm512_abs_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ph)
* [ ] [`_mm512_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_ph_mask)
* [ ] [`_mm512_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask)
- * [ ] [`_mm512_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conj_pch)
* [ ] [`_mm512_cvt_roundepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi16_ph)
* [ ] [`_mm512_cvt_roundepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ph)
* [ ] [`_mm512_cvt_roundepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi64_ph)
@@ -104,47 +102,14 @@
* [ ] [`_mm512_cvtx_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundps_ph)
* [ ] [`_mm512_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps)
* [ ] [`_mm512_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph)
- * [ ] [`_mm512_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_pch)
- * [ ] [`_mm512_fcmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_round_pch)
- * [ ] [`_mm512_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_pch)
- * [ ] [`_mm512_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_ph)
- * [ ] [`_mm512_fmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_pch)
- * [ ] [`_mm512_fmadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_ph)
- * [ ] [`_mm512_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_ph)
- * [ ] [`_mm512_fmaddsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_round_ph)
- * [ ] [`_mm512_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_ph)
- * [ ] [`_mm512_fmsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_round_ph)
- * [ ] [`_mm512_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_ph)
- * [ ] [`_mm512_fmsubadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_round_ph)
- * [ ] [`_mm512_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_ph)
- * [ ] [`_mm512_fnmadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_round_ph)
- * [ ] [`_mm512_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_ph)
- * [ ] [`_mm512_fnmsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_round_ph)
* [ ] [`_mm512_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fpclass_ph_mask)
* [ ] [`_mm512_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_ph)
* [ ] [`_mm512_getexp_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ph)
* [ ] [`_mm512_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ph)
* [ ] [`_mm512_getmant_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ph)
- * [ ] [`_mm512_mask3_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_pch)
- * [ ] [`_mm512_mask3_fcmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_round_pch)
- * [ ] [`_mm512_mask3_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_pch)
- * [ ] [`_mm512_mask3_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_ph)
- * [ ] [`_mm512_mask3_fmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_pch)
- * [ ] [`_mm512_mask3_fmadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_ph)
- * [ ] [`_mm512_mask3_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_ph)
- * [ ] [`_mm512_mask3_fmaddsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_round_ph)
- * [ ] [`_mm512_mask3_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_ph)
- * [ ] [`_mm512_mask3_fmsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_round_ph)
- * [ ] [`_mm512_mask3_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_ph)
- * [ ] [`_mm512_mask3_fmsubadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_round_ph)
- * [ ] [`_mm512_mask3_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_ph)
- * [ ] [`_mm512_mask3_fnmadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_round_ph)
- * [ ] [`_mm512_mask3_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_ph)
- * [ ] [`_mm512_mask3_fnmsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_ph)
* [ ] [`_mm512_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph)
* [ ] [`_mm512_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask)
* [ ] [`_mm512_mask_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask)
- * [ ] [`_mm512_mask_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conj_pch)
* [ ] [`_mm512_mask_cvt_roundepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi16_ph)
* [ ] [`_mm512_mask_cvt_roundepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ph)
* [ ] [`_mm512_mask_cvt_roundepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi64_ph)
@@ -189,22 +154,6 @@
* [ ] [`_mm512_mask_cvtx_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundps_ph)
* [ ] [`_mm512_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps)
* [ ] [`_mm512_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph)
- * [ ] [`_mm512_mask_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_pch)
- * [ ] [`_mm512_mask_fcmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_round_pch)
- * [ ] [`_mm512_mask_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_pch)
- * [ ] [`_mm512_mask_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_ph)
- * [ ] [`_mm512_mask_fmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_pch)
- * [ ] [`_mm512_mask_fmadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_ph)
- * [ ] [`_mm512_mask_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_ph)
- * [ ] [`_mm512_mask_fmaddsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_round_ph)
- * [ ] [`_mm512_mask_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_ph)
- * [ ] [`_mm512_mask_fmsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_round_ph)
- * [ ] [`_mm512_mask_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_ph)
- * [ ] [`_mm512_mask_fmsubadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_round_ph)
- * [ ] [`_mm512_mask_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_ph)
- * [ ] [`_mm512_mask_fnmadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_round_ph)
- * [ ] [`_mm512_mask_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_ph)
- * [ ] [`_mm512_mask_fnmsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_round_ph)
* [ ] [`_mm512_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fpclass_ph_mask)
* [ ] [`_mm512_mask_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_ph)
* [ ] [`_mm512_mask_getexp_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ph)
@@ -224,7 +173,6 @@
* [ ] [`_mm512_mask_scalef_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph)
* [ ] [`_mm512_mask_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_ph)
* [ ] [`_mm512_mask_sqrt_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_ph)
- * [ ] [`_mm512_maskz_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conj_pch)
* [ ] [`_mm512_maskz_cvt_roundepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph)
* [ ] [`_mm512_maskz_cvt_roundepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph)
* [ ] [`_mm512_maskz_cvt_roundepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph)
@@ -269,22 +217,6 @@
* [ ] [`_mm512_maskz_cvtx_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph)
* [ ] [`_mm512_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps)
* [ ] [`_mm512_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph)
- * [ ] [`_mm512_maskz_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_pch)
- * [ ] [`_mm512_maskz_fcmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_round_pch)
- * [ ] [`_mm512_maskz_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_pch)
- * [ ] [`_mm512_maskz_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_ph)
- * [ ] [`_mm512_maskz_fmadd_round_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_pch)
- * [ ] [`_mm512_maskz_fmadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_ph)
- * [ ] [`_mm512_maskz_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_ph)
- * [ ] [`_mm512_maskz_fmaddsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_round_ph)
- * [ ] [`_mm512_maskz_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_ph)
- * [ ] [`_mm512_maskz_fmsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_round_ph)
- * [ ] [`_mm512_maskz_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_ph)
- * [ ] [`_mm512_maskz_fmsubadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_round_ph)
- * [ ] [`_mm512_maskz_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_ph)
- * [ ] [`_mm512_maskz_fnmadd_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_round_ph)
- * [ ] [`_mm512_maskz_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_ph)
- * [ ] [`_mm512_maskz_fnmsub_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_round_ph)
* [ ] [`_mm512_maskz_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_ph)
* [ ] [`_mm512_maskz_getexp_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ph)
* [ ] [`_mm512_maskz_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ph)
@@ -359,35 +291,11 @@
* [ ] [`_mm_cvttsh_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u64)
* [ ] [`_mm_cvtu32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh)
* [ ] [`_mm_cvtu64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sh)
- * [ ] [`_mm_fcmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_round_sch)
- * [ ] [`_mm_fcmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_sch)
- * [ ] [`_mm_fmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sch)
- * [ ] [`_mm_fmadd_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sh)
- * [ ] [`_mm_fmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sch)
- * [ ] [`_mm_fmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sh)
- * [ ] [`_mm_fmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_round_sh)
- * [ ] [`_mm_fmsub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_sh)
- * [ ] [`_mm_fnmadd_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_round_sh)
- * [ ] [`_mm_fnmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_sh)
- * [ ] [`_mm_fnmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_round_sh)
- * [ ] [`_mm_fnmsub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_sh)
* [ ] [`_mm_fpclass_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_sh_mask)
* [ ] [`_mm_getexp_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_sh)
* [ ] [`_mm_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sh)
* [ ] [`_mm_getmant_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sh)
* [ ] [`_mm_getmant_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sh)
- * [ ] [`_mm_mask3_fcmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_round_sch)
- * [ ] [`_mm_mask3_fcmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_sch)
- * [ ] [`_mm_mask3_fmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sch)
- * [ ] [`_mm_mask3_fmadd_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sh)
- * [ ] [`_mm_mask3_fmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sch)
- * [ ] [`_mm_mask3_fmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sh)
- * [ ] [`_mm_mask3_fmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_round_sh)
- * [ ] [`_mm_mask3_fmsub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_sh)
- * [ ] [`_mm_mask3_fnmadd_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_round_sh)
- * [ ] [`_mm_mask3_fnmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_sh)
- * [ ] [`_mm_mask3_fnmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_round_sh)
- * [ ] [`_mm_mask3_fnmsub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_sh)
* [ ] [`_mm_mask_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh)
* [ ] [`_mm_mask_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd)
* [ ] [`_mm_mask_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss)
@@ -396,18 +304,6 @@
* [ ] [`_mm_mask_cvtsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd)
* [ ] [`_mm_mask_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss)
* [ ] [`_mm_mask_cvtss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh)
- * [ ] [`_mm_mask_fcmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_round_sch)
- * [ ] [`_mm_mask_fcmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_sch)
- * [ ] [`_mm_mask_fmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sch)
- * [ ] [`_mm_mask_fmadd_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sh)
- * [ ] [`_mm_mask_fmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sch)
- * [ ] [`_mm_mask_fmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sh)
- * [ ] [`_mm_mask_fmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_round_sh)
- * [ ] [`_mm_mask_fmsub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_sh)
- * [ ] [`_mm_mask_fnmadd_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_round_sh)
- * [ ] [`_mm_mask_fnmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_sh)
- * [ ] [`_mm_mask_fnmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_round_sh)
- * [ ] [`_mm_mask_fnmsub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_sh)
* [ ] [`_mm_mask_fpclass_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_sh_mask)
* [ ] [`_mm_mask_getexp_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_sh)
* [ ] [`_mm_mask_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh)
@@ -431,18 +327,6 @@
* [ ] [`_mm_maskz_cvtsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd)
* [ ] [`_mm_maskz_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss)
* [ ] [`_mm_maskz_cvtss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh)
- * [ ] [`_mm_maskz_fcmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_round_sch)
- * [ ] [`_mm_maskz_fcmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_sch)
- * [ ] [`_mm_maskz_fmadd_round_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sch)
- * [ ] [`_mm_maskz_fmadd_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sh)
- * [ ] [`_mm_maskz_fmadd_sch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sch)
- * [ ] [`_mm_maskz_fmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sh)
- * [ ] [`_mm_maskz_fmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_round_sh)
- * [ ] [`_mm_maskz_fmsub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_sh)
- * [ ] [`_mm_maskz_fnmadd_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_round_sh)
- * [ ] [`_mm_maskz_fnmadd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_sh)
- * [ ] [`_mm_maskz_fnmsub_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_round_sh)
- * [ ] [`_mm_maskz_fnmsub_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_sh)
* [ ] [`_mm_maskz_getexp_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_sh)
* [ ] [`_mm_maskz_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh)
* [ ] [`_mm_maskz_getmant_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh)
@@ -473,9 +357,7 @@
["AVX512_FP16", "AVX512VL"]
- * [ ] [`_mm256_abs_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_ph)
* [ ] [`_mm256_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ph_mask)
- * [ ] [`_mm256_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conj_pch)
* [ ] [`_mm256_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_ph)
* [ ] [`_mm256_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ph)
* [ ] [`_mm256_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi64_ph)
@@ -498,28 +380,11 @@
* [ ] [`_mm256_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu64)
* [ ] [`_mm256_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps)
* [ ] [`_mm256_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph)
- * [ ] [`_mm256_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmadd_pch)
- * [ ] [`_mm256_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pch)
- * [ ] [`_mm256_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ph)
- * [ ] [`_mm256_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_ph)
- * [ ] [`_mm256_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ph)
- * [ ] [`_mm256_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_ph)
- * [ ] [`_mm256_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ph)
- * [ ] [`_mm256_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_ph)
* [ ] [`_mm256_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask)
* [ ] [`_mm256_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getexp_ph)
* [ ] [`_mm256_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getmant_ph)
- * [ ] [`_mm256_mask3_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fcmadd_pch)
- * [ ] [`_mm256_mask3_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_pch)
- * [ ] [`_mm256_mask3_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_ph)
- * [ ] [`_mm256_mask3_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmaddsub_ph)
- * [ ] [`_mm256_mask3_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsub_ph)
- * [ ] [`_mm256_mask3_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsubadd_ph)
- * [ ] [`_mm256_mask3_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmadd_ph)
- * [ ] [`_mm256_mask3_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmsub_ph)
* [ ] [`_mm256_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph)
* [ ] [`_mm256_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask)
- * [ ] [`_mm256_mask_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conj_pch)
* [ ] [`_mm256_mask_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph)
* [ ] [`_mm256_mask_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi32_ph)
* [ ] [`_mm256_mask_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi64_ph)
@@ -542,14 +407,6 @@
* [ ] [`_mm256_mask_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu64)
* [ ] [`_mm256_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps)
* [ ] [`_mm256_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph)
- * [ ] [`_mm256_mask_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmadd_pch)
- * [ ] [`_mm256_mask_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_pch)
- * [ ] [`_mm256_mask_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_ph)
- * [ ] [`_mm256_mask_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmaddsub_ph)
- * [ ] [`_mm256_mask_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsub_ph)
- * [ ] [`_mm256_mask_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsubadd_ph)
- * [ ] [`_mm256_mask_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmadd_ph)
- * [ ] [`_mm256_mask_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmsub_ph)
* [ ] [`_mm256_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask)
* [ ] [`_mm256_mask_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getexp_ph)
* [ ] [`_mm256_mask_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph)
@@ -561,7 +418,6 @@
* [ ] [`_mm256_mask_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rsqrt_ph)
* [ ] [`_mm256_mask_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph)
* [ ] [`_mm256_mask_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sqrt_ph)
- * [ ] [`_mm256_maskz_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conj_pch)
* [ ] [`_mm256_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph)
* [ ] [`_mm256_maskz_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph)
* [ ] [`_mm256_maskz_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph)
@@ -584,14 +440,6 @@
* [ ] [`_mm256_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64)
* [ ] [`_mm256_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps)
* [ ] [`_mm256_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph)
- * [ ] [`_mm256_maskz_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmadd_pch)
- * [ ] [`_mm256_maskz_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_pch)
- * [ ] [`_mm256_maskz_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_ph)
- * [ ] [`_mm256_maskz_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmaddsub_ph)
- * [ ] [`_mm256_maskz_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsub_ph)
- * [ ] [`_mm256_maskz_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsubadd_ph)
- * [ ] [`_mm256_maskz_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmadd_ph)
- * [ ] [`_mm256_maskz_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmsub_ph)
* [ ] [`_mm256_maskz_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph)
* [ ] [`_mm256_maskz_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph)
* [ ] [`_mm256_maskz_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_ph)
@@ -616,9 +464,7 @@
* [ ] [`_mm256_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ph)
* [ ] [`_mm256_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph)
* [ ] [`_mm256_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ph)
- * [ ] [`_mm_abs_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_ph)
* [ ] [`_mm_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask)
- * [ ] [`_mm_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conj_pch)
* [ ] [`_mm_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph)
* [ ] [`_mm_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph)
* [ ] [`_mm_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi64_ph)
@@ -641,28 +487,11 @@
* [ ] [`_mm_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu64)
* [ ] [`_mm_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps)
* [ ] [`_mm_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph)
- * [ ] [`_mm_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_pch)
- * [ ] [`_mm_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pch)
- * [ ] [`_mm_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ph)
- * [ ] [`_mm_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_ph)
- * [ ] [`_mm_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ph)
- * [ ] [`_mm_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_ph)
- * [ ] [`_mm_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ph)
- * [ ] [`_mm_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ph)
* [ ] [`_mm_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask)
* [ ] [`_mm_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ph)
* [ ] [`_mm_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ph)
- * [ ] [`_mm_mask3_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_pch)
- * [ ] [`_mm_mask3_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_pch)
- * [ ] [`_mm_mask3_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_ph)
- * [ ] [`_mm_mask3_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmaddsub_ph)
- * [ ] [`_mm_mask3_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_ph)
- * [ ] [`_mm_mask3_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsubadd_ph)
- * [ ] [`_mm_mask3_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_ph)
- * [ ] [`_mm_mask3_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_ph)
* [ ] [`_mm_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph)
* [ ] [`_mm_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask)
- * [ ] [`_mm_mask_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conj_pch)
* [ ] [`_mm_mask_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph)
* [ ] [`_mm_mask_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi32_ph)
* [ ] [`_mm_mask_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi64_ph)
@@ -685,14 +514,6 @@
* [ ] [`_mm_mask_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu64)
* [ ] [`_mm_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps)
* [ ] [`_mm_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph)
- * [ ] [`_mm_mask_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_pch)
- * [ ] [`_mm_mask_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_pch)
- * [ ] [`_mm_mask_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_ph)
- * [ ] [`_mm_mask_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmaddsub_ph)
- * [ ] [`_mm_mask_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_ph)
- * [ ] [`_mm_mask_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsubadd_ph)
- * [ ] [`_mm_mask_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_ph)
- * [ ] [`_mm_mask_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_ph)
* [ ] [`_mm_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask)
* [ ] [`_mm_mask_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ph)
* [ ] [`_mm_mask_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ph)
@@ -708,7 +529,6 @@
* [ ] [`_mm_mask_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_ph)
* [ ] [`_mm_mask_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph)
* [ ] [`_mm_mask_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_ph)
- * [ ] [`_mm_maskz_conj_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conj_pch)
* [ ] [`_mm_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph)
* [ ] [`_mm_maskz_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph)
* [ ] [`_mm_maskz_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph)
@@ -731,14 +551,6 @@
* [ ] [`_mm_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64)
* [ ] [`_mm_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps)
* [ ] [`_mm_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph)
- * [ ] [`_mm_maskz_fcmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_pch)
- * [ ] [`_mm_maskz_fmadd_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_pch)
- * [ ] [`_mm_maskz_fmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_ph)
- * [ ] [`_mm_maskz_fmaddsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ph)
- * [ ] [`_mm_maskz_fmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_ph)
- * [ ] [`_mm_maskz_fmsubadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsubadd_ph)
- * [ ] [`_mm_maskz_fnmadd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_ph)
- * [ ] [`_mm_maskz_fnmsub_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_ph)
* [ ] [`_mm_maskz_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph)
* [ ] [`_mm_maskz_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ph)
* [ ] [`_mm_maskz_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_ph)
diff --git a/crates/core_arch/src/x86/avx512fp16.rs b/crates/core_arch/src/x86/avx512fp16.rs
index a2a31d87e9..11e5f7d8e9 100644
--- a/crates/core_arch/src/x86/avx512fp16.rs
+++ b/crates/core_arch/src/x86/avx512fp16.rs
@@ -2304,7 +2304,7 @@ pub unsafe fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h {
}
/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
-/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_pch)
@@ -2317,7 +2317,7 @@ pub unsafe fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h
}
/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
-/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_pch)
@@ -2343,7 +2343,7 @@ pub unsafe fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h {
}
/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
-/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_pch)
@@ -2356,7 +2356,7 @@ pub unsafe fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m2
}
/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
-/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_pch)
@@ -2382,7 +2382,7 @@ pub unsafe fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h {
}
/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
-/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pch)
@@ -2395,7 +2395,7 @@ pub unsafe fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m
}
/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
-/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pch)
@@ -2431,7 +2431,7 @@ pub unsafe fn _mm512_mul_round_pch(a: __m512h, b: __m512h)
}
/// Multiply the packed complex numbers in a and b, and store the results in dst using writemask k (the element
-/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
///
/// Rounding is done according to the rounding parameter, which can be one of:
@@ -2465,7 +2465,7 @@ pub unsafe fn _mm512_mask_mul_round_pch(
}
/// Multiply the packed complex numbers in a and b, and store the results in dst using zeromask k (the element
-/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
///
/// Rounding is done according to the rounding parameter, which can be one of:
@@ -2634,7 +2634,7 @@ pub unsafe fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h {
}
/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
-/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent
+/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_pch)
@@ -2647,7 +2647,7 @@ pub unsafe fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128
}
/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
-/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_pch)
@@ -2673,7 +2673,7 @@ pub unsafe fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h {
}
/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
-/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmul_pch)
@@ -2686,7 +2686,7 @@ pub unsafe fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m
}
/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
-/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmul_pch)
@@ -2711,7 +2711,7 @@ pub unsafe fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h {
}
/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
-/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_pch)
@@ -2724,7 +2724,7 @@ pub unsafe fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __
}
/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
-/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_pch)
@@ -2758,7 +2758,7 @@ pub unsafe fn _mm512_fmul_round_pch(a: __m512h, b: __m512h)
}
/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
-/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
/// Rounding is done according to the rounding parameter, which can be one of:
///
@@ -2785,7 +2785,7 @@ pub unsafe fn _mm512_mask_fmul_round_pch(
}
/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
-/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
+/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
/// Rounding is done according to the rounding parameter, which can be one of:
///
@@ -2941,7 +2941,7 @@ pub unsafe fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h {
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -2955,7 +2955,7 @@ pub unsafe fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -2983,7 +2983,7 @@ pub unsafe fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h {
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -2997,7 +2997,7 @@ pub unsafe fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -3025,7 +3025,7 @@ pub unsafe fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h {
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -3039,7 +3039,7 @@ pub unsafe fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -3077,7 +3077,7 @@ pub unsafe fn _mm512_cmul_round_pch(a: __m512h, b: __m512h)
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -3112,7 +3112,7 @@ pub unsafe fn _mm512_mask_cmul_round_pch(
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -3281,7 +3281,7 @@ pub unsafe fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h {
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -3295,7 +3295,7 @@ pub unsafe fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m12
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -3323,7 +3323,7 @@ pub unsafe fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h {
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -3337,7 +3337,7 @@ pub unsafe fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -3365,7 +3365,7 @@ pub unsafe fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h {
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -3379,7 +3379,7 @@ pub unsafe fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: _
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -3416,7 +3416,7 @@ pub unsafe fn _mm512_fcmul_round_pch(a: __m512h, b: __m512h
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
+/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -3445,7 +3445,7 @@ pub unsafe fn _mm512_mask_fcmul_round_pch(
}
/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
-/// store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
+/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
///
@@ -3594,2682 +3594,8810 @@ pub unsafe fn _mm_maskz_fcmul_round_sch(
_mm_maskz_cmul_round_sch::(k, a, b)
}
-#[allow(improper_ctypes)]
-extern "C" {
- #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
- fn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
- #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
- fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
+/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
+/// the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_abs_ph(v2: __m128h) -> __m128h {
+ transmute(_mm_and_si128(transmute(v2), _mm_set1_epi16(i16::MAX)))
+}
- #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
- fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
- fn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
- fn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
- fn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
+/// the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_abs_ph(v2: __m256h) -> __m256h {
+ transmute(_mm256_and_si256(transmute(v2), _mm256_set1_epi16(i16::MAX)))
+}
- #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
- fn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
- fn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
- fn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
- fn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
+/// the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_abs_ph(v2: __m512h) -> __m512h {
+ transmute(_mm512_and_si512(transmute(v2), _mm512_set1_epi16(i16::MAX)))
+}
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
- fn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
- fn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
- fn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
- fn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex
+/// number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines
+/// the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate
+/// `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conj_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_conj_pch(a: __m128h) -> __m128h {
+ transmute(_mm_xor_si128(transmute(a), _mm_set1_epi32(i32::MIN)))
+}
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
- fn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
- fn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
- fn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
- fn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
+/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
+/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number
+/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conj_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
+ let r: __m128 = transmute(_mm_conj_pch(a));
+ transmute(simd_select_bitmask(k, r, transmute(src)))
+}
+/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
+/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conj_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h {
+ _mm_mask_conj_pch(_mm_setzero_ph(), k, a)
}
-#[cfg(test)]
-mod tests {
- use crate::core_arch::x86::*;
- use crate::mem::transmute;
- use crate::ptr::{addr_of, addr_of_mut};
- use stdarch_test::simd_test;
+/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number
+/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
+/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conj_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_conj_pch(a: __m256h) -> __m256h {
+ transmute(_mm256_xor_si256(transmute(a), _mm256_set1_epi32(i32::MIN)))
+}
- #[target_feature(enable = "avx512fp16")]
- unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
- _mm_setr_ph(re, im, re, im, re, im, re, im)
- }
+/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
+/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
+/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conj_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m256h {
+ let r: __m256 = transmute(_mm256_conj_pch(a));
+ transmute(simd_select_bitmask(k, r, transmute(src)))
+}
- #[target_feature(enable = "avx512fp16")]
- unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
- _mm256_setr_ph(
- re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
- )
- }
+/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
+/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conj_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h {
+ _mm256_mask_conj_pch(_mm256_setzero_ph(), k, a)
+}
- #[target_feature(enable = "avx512fp16")]
- unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
- _mm512_setr_ph(
- re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
- re, im, re, im, re, im, re, im, re, im,
- )
- }
+/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number
+/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
+/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conj_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_conj_pch(a: __m512h) -> __m512h {
+ transmute(_mm512_xor_si512(transmute(a), _mm512_set1_epi32(i32::MIN)))
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set_ph() {
- let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- assert_eq_m128h(r, e);
- }
+/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
+/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
+/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conj_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __m512h {
+ let r: __m512 = transmute(_mm512_conj_pch(a));
+ transmute(simd_select_bitmask(k, r, transmute(src)))
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_set_ph() {
- let r = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let e = _mm256_setr_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- assert_eq_m256h(r, e);
- }
+/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
+/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conj_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h {
+ _mm512_mask_conj_pch(_mm512_setzero_ph(), k, a)
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_set_ph() {
- let r = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let e = _mm512_setr_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- assert_eq_m512h(r, e);
- }
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ _mm_mask3_fmadd_pch(a, b, c, 0xff)
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set_sh() {
- let r = _mm_set_sh(1.0);
- let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
- assert_eq_m128h(r, e);
- }
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst using writemask k (the element is copied from a when the corresponding
+/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+ let r: __m128 = transmute(_mm_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
+ transmute(simd_select_bitmask(k, r, transmute(a)))
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set1_ph() {
- let r = _mm_set1_ph(1.0);
- let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
- assert_eq_m128h(r, e);
- }
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst using writemask k (the element is copied from c when the corresponding
+/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+ transmute(vfmaddcph_mask3_128(
+ transmute(a),
+ transmute(b),
+ transmute(c),
+ k,
+ ))
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_set1_ph() {
- let r = _mm256_set1_ph(1.0);
- let e = _mm256_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- );
- assert_eq_m256h(r, e);
- }
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
+/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
+/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ transmute(vfmaddcph_maskz_128(
+ transmute(a),
+ transmute(b),
+ transmute(c),
+ k,
+ ))
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_set1_ph() {
- let r = _mm512_set1_ph(1.0);
- let e = _mm512_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- );
- assert_eq_m512h(r, e);
- }
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_fmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+ _mm256_mask3_fmadd_pch(a, b, c, 0xff)
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_setr_ph() {
- let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- assert_eq_m128h(r, e);
- }
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
+/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
+/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_fmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
+ let r: __m256 = transmute(_mm256_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
+ transmute(simd_select_bitmask(k, r, transmute(a)))
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_setr_ph() {
- let r = _mm256_setr_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let e = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- assert_eq_m256h(r, e);
- }
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst using writemask k (the element is copied from c when the corresponding
+/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask3_fmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
+ transmute(vfmaddcph_mask3_256(
+ transmute(a),
+ transmute(b),
+ transmute(c),
+ k,
+ ))
+}
+
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
+/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
+/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_fmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+ transmute(vfmaddcph_maskz_256(
+ transmute(a),
+ transmute(b),
+ transmute(c),
+ k,
+ ))
+}
+
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+ _mm512_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
+}
+
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
+/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
+/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
+ _mm512_mask_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
+}
+
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst using writemask k (the element is copied from c when the corresponding
+/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
+ _mm512_mask3_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
+}
+
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
+/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
+/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+ _mm512_maskz_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
+}
+
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fmadd_round_pch(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask3_fmadd_round_pch::(a, b, c, 0xffff)
+}
+
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
+/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
+/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fmadd_round_pch(
+ a: __m512h,
+ k: __mmask16,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ let r: __m512 = transmute(_mm512_mask3_fmadd_round_pch::(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does
+ transmute(simd_select_bitmask(k, r, transmute(a)))
+}
+
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst using writemask k (the element is copied from c when the corresponding
+/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fmadd_round_pch(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+ k: __mmask16,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ transmute(vfmaddcph_mask3_512(
+ transmute(a),
+ transmute(b),
+ transmute(c),
+ k,
+ ROUNDING,
+ ))
+}
+
+/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
+/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
+/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
+/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fmadd_round_pch(
+ k: __mmask16,
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ transmute(vfmaddcph_maskz_512(
+ transmute(a),
+ transmute(b),
+ transmute(c),
+ k,
+ ROUNDING,
+ ))
+}
+
+/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
+/// store the result in the lower elements of dst, and copy the upper 6 packed elements from a to the
+/// upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ _mm_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
+}
+
+/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
+/// store the result in the lower elements of dst using writemask k (elements are copied from a when
+/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
+/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+ _mm_mask_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
+}
+
+/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
+/// store the result in the lower elements of dst using writemask k (elements are copied from c when
+/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
+/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+ _mm_mask3_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
+}
+
+/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
+/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask
+/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each
+/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ _mm_maskz_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
+}
+
+/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
+/// store the result in the lower elements of dst. Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fmadd_round_sch(
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ transmute(vfmaddcsh_mask(
+ transmute(a),
+ transmute(b),
+ transmute(c),
+ 0xff,
+ ROUNDING,
+ ))
+}
+
+/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
+/// store the result in the lower elements of dst using writemask k (elements are copied from a when
+/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
+/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fmadd_round_sch(
+ a: __m128h,
+ k: __mmask8,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let a = transmute(a);
+ let r = vfmaddcsh_mask(a, transmute(b), transmute(c), k, ROUNDING); // using `0xff` would have been fine here, but this is what CLang does
+ transmute(_mm_mask_move_ss(a, k, a, r))
+}
+
+/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
+/// store the result in the lower elements of dst using writemask k (elements are copied from c when
+/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
+/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
+/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fmadd_round_sch(
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+ k: __mmask8,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let c = transmute(c);
+ let r = vfmaddcsh_mask(transmute(a), transmute(b), c, k, ROUNDING);
+ transmute(_mm_move_ss(c, r))
+}
+
+/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
+/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask
+/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each
+/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
+/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fmadd_round_sch(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let a = transmute(a);
+ let r = vfmaddcsh_maskz(a, transmute(b), transmute(c), k, ROUNDING);
+ transmute(_mm_move_ss(a, r)) // FIXME: If `k == 0`, then LLVM optimized `vfmaddcsh_maskz` to output an all-zero vector, which is incorrect
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
+/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
+/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ _mm_mask3_fcmadd_pch(a, b, c, 0xff)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
+/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fcmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+ let r: __m128 = transmute(_mm_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
+ transmute(simd_select_bitmask(k, r, transmute(a)))
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
+/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+ transmute(vfcmaddcph_mask3_128(
+ transmute(a),
+ transmute(b),
+ transmute(c),
+ k,
+ ))
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
+/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fcmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ transmute(vfcmaddcph_maskz_128(
+ transmute(a),
+ transmute(b),
+ transmute(c),
+ k,
+ ))
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
+/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
+/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+ _mm256_mask3_fcmadd_pch(a, b, c, 0xff)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
+/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_fcmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
+ let r: __m256 = transmute(_mm256_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
+ transmute(simd_select_bitmask(k, r, transmute(a)))
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
+/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fcmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask3_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
+ transmute(vfcmaddcph_mask3_256(
+ transmute(a),
+ transmute(b),
+ transmute(c),
+ k,
+ ))
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
+/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfcmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_fcmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+ transmute(vfcmaddcph_maskz_256(
+ transmute(a),
+ transmute(b),
+ transmute(c),
+ k,
+ ))
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
+/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
+/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+ _mm512_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
+/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fcmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
+ _mm512_mask_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
+/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
+ _mm512_mask3_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
+/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fcmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+ _mm512_maskz_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
+/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
+/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fcmadd_round_pch(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask3_fcmadd_round_pch::(a, b, c, 0xffff)
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
+/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fcmadd_round_pch(
+ a: __m512h,
+ k: __mmask16,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ let r: __m512 = transmute(_mm512_mask3_fcmadd_round_pch::(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does
+ transmute(simd_select_bitmask(k, r, transmute(a)))
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c using writemask k (the element is copied from c when the corresponding
+/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex
+/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fcmadd_round_pch(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+ k: __mmask16,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ transmute(vfcmaddcph_mask3_512(
+ transmute(a),
+ transmute(b),
+ transmute(c),
+ k,
+ ROUNDING,
+ ))
+}
+
+/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
+/// to the corresponding complex numbers in c using zeromask k (the element is zeroed out when the corresponding
+/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision
+/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex
+/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_round_pch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fcmadd_round_pch(
+ k: __mmask16,
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ transmute(vfcmaddcph_maskz_512(
+ transmute(a),
+ transmute(b),
+ transmute(c),
+ k,
+ ROUNDING,
+ ))
+}
+
+/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
+/// accumulate to the lower complex number in c, and store the result in the lower elements of dst,
+/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
+/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
+/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ _mm_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
+}
+
+/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
+/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
+/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper
+/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fcmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+ _mm_mask_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
+}
+
+/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
+/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
+/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper
+/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+ _mm_mask3_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
+}
+
+/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
+/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
+/// zeromask k (the element is zeroed out when the corresponding mask bit is not set), and copy the upper
+/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fcmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ _mm_maskz_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
+}
+
+/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
+/// accumulate to the lower complex number in c, and store the result in the lower elements of dst,
+/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
+/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
+/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fcmadd_round_sch(
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ transmute(vfcmaddcsh_mask(
+ transmute(a),
+ transmute(b),
+ transmute(c),
+ 0xff,
+ ROUNDING,
+ ))
+}
+
+/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
+/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
+/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper
+/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fcmadd_round_sch(
+ a: __m128h,
+ k: __mmask8,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let a = transmute(a);
+ let r = vfcmaddcsh_mask(a, transmute(b), transmute(c), k, ROUNDING);
+ transmute(_mm_mask_move_ss(a, k, a, r))
+}
+
+/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
+/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
+/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper
+/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
+/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
+/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fcmadd_round_sch(
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+ k: __mmask8,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let c = transmute(c);
+ let r = vfcmaddcsh_mask(transmute(a), transmute(b), c, k, ROUNDING);
+ transmute(_mm_move_ss(c, r))
+}
+
+/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
+/// accumulate to the lower complex number in c using zeromask k (the element is zeroed out when the corresponding
+/// mask bit is not set), and store the result in the lower elements of dst, and copy the upper 6 packed elements
+/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit)
+/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex
+/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_round_sch)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fcmadd_round_sch(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let a = transmute(a);
+ let r = vfcmaddcsh_maskz(a, transmute(b), transmute(c), k, ROUNDING);
+ transmute(_mm_move_ss(a, r)) // FIXME: If `k == 0`, then LLVM optimized `vfcmaddcsh_maskz` to output an all-zero vector, which is incorrect
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ simd_fma(a, b, c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_fmadd_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+ simd_select_bitmask(k, _mm_fmadd_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_fmadd_ph(a, b, c), _mm_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+ simd_fma(a, b, c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_fmadd_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
+ simd_select_bitmask(k, _mm256_fmadd_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_fmadd_ph(a, b, c), _mm256_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+ simd_fma(a, b, c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_fmadd_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
+ simd_select_bitmask(k, _mm512_fmadd_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_fmadd_ph(a, b, c), _mm512_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fmadd_round_ph(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ vfmaddph_512(a, b, c, ROUNDING)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from a when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fmadd_round_ph(
+ a: __m512h,
+ k: __mmask32,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_fmadd_round_ph::(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from c when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fmadd_round_ph(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+ k: __mmask32,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_fmadd_round_ph::(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
+/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fmadd_round_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(
+ k,
+ _mm512_fmadd_round_ph::(a, b, c),
+ _mm512_setzero_ph(),
+ )
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
+/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper
+/// 7 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ let r = fmaf16(extracta, extractb, extractc);
+ simd_insert!(a, 0, r)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
+/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
+/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
+/// upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+ let mut fmadd: f16 = simd_extract!(a, 0);
+ if k & 1 != 0 {
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fmadd = fmaf16(fmadd, extractb, extractc);
+ }
+ simd_insert!(a, 0, fmadd)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
+/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
+/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
+/// upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+ let mut fmadd: f16 = simd_extract!(c, 0);
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ fmadd = fmaf16(extracta, extractb, fmadd);
+ }
+ simd_insert!(c, 0, fmadd)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
+/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
+/// upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ let mut fmadd: f16 = 0.0;
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fmadd = fmaf16(extracta, extractb, extractc);
+ }
+ simd_insert!(a, 0, fmadd)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
+/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper
+/// 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fmadd_round_sh(
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ let r = vfmaddsh(extracta, extractb, extractc, ROUNDING);
+ simd_insert!(a, 0, r)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
+/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
+/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
+/// upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fmadd_round_sh(
+ a: __m128h,
+ k: __mmask8,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let mut fmadd: f16 = simd_extract!(a, 0);
+ if k & 1 != 0 {
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fmadd = vfmaddsh(fmadd, extractb, extractc, ROUNDING);
+ }
+ simd_insert!(a, 0, fmadd)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
+/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
+/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
+/// upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fmadd_round_sh(
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+ k: __mmask8,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let mut fmadd: f16 = simd_extract!(c, 0);
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ fmadd = vfmaddsh(extracta, extractb, fmadd, ROUNDING);
+ }
+ simd_insert!(c, 0, fmadd)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
+/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
+/// upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fmadd_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let mut fmadd: f16 = 0.0;
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fmadd = vfmaddsh(extracta, extractb, extractc, ROUNDING);
+ }
+ simd_insert!(a, 0, fmadd)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst.
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ simd_fma(a, b, simd_neg(c))
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
+/// from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_fmsub_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
+/// from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+ simd_select_bitmask(k, _mm_fmsub_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_fmsub_ph(a, b, c), _mm_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+ simd_fma(a, b, simd_neg(c))
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
+/// from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_fmsub_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
+/// from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
+ simd_select_bitmask(k, _mm256_fmsub_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_fmsub_ph(a, b, c), _mm256_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+ simd_fma(a, b, simd_neg(c))
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
+/// from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_fmsub_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
+/// from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
+ simd_select_bitmask(k, _mm512_fmsub_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_fmsub_ph(a, b, c), _mm512_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fmsub_round_ph(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ vfmaddph_512(a, b, simd_neg(c), ROUNDING)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
+/// from a when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fmsub_round_ph(
+ a: __m512h,
+ k: __mmask32,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_fmsub_round_ph::(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
+/// from c when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fmsub_round_ph(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+ k: __mmask32,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_fmsub_round_ph::(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fmsub_round_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(
+ k,
+ _mm512_fmsub_round_ph::(a, b, c),
+ _mm512_setzero_ph(),
+ )
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
+/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper
+/// 7 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ let r = fmaf16(extracta, extractb, -extractc);
+ simd_insert!(a, 0, r)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
+/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
+/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
+/// upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+ let mut fmsub: f16 = simd_extract!(a, 0);
+ if k & 1 != 0 {
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fmsub = fmaf16(fmsub, extractb, -extractc);
+ }
+ simd_insert!(a, 0, fmsub)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
+/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
+/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
+/// upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+ let mut fmsub: f16 = simd_extract!(c, 0);
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ fmsub = fmaf16(extracta, extractb, -fmsub);
+ }
+ simd_insert!(c, 0, fmsub)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
+/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
+/// upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ let mut fmsub: f16 = 0.0;
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fmsub = fmaf16(extracta, extractb, -extractc);
+ }
+ simd_insert!(a, 0, fmsub)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
+/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper
+/// 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fmsub_round_sh(
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ let r = vfmaddsh(extracta, extractb, -extractc, ROUNDING);
+ simd_insert!(a, 0, r)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
+/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
+/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
+/// upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fmsub_round_sh(
+ a: __m128h,
+ k: __mmask8,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let mut fmsub: f16 = simd_extract!(a, 0);
+ if k & 1 != 0 {
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fmsub = vfmaddsh(fmsub, extractb, -extractc, ROUNDING);
+ }
+ simd_insert!(a, 0, fmsub)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
+/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
+/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
+/// upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fmsub_round_sh(
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+ k: __mmask8,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let mut fmsub: f16 = simd_extract!(c, 0);
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ fmsub = vfmaddsh(extracta, extractb, -fmsub, ROUNDING);
+ }
+ simd_insert!(c, 0, fmsub)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
+/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
+/// upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fmsub_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let mut fmsub: f16 = 0.0;
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fmsub = vfmaddsh(extracta, extractb, -extractc, ROUNDING);
+ }
+ simd_insert!(a, 0, fmsub)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ simd_fma(simd_neg(a), b, c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_fnmadd_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+ simd_select_bitmask(k, _mm_fnmadd_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_fnmadd_ph(a, b, c), _mm_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+ simd_fma(simd_neg(a), b, c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_fnmadd_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
+ simd_select_bitmask(k, _mm256_fnmadd_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_fnmadd_ph(a, b, c), _mm256_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+ simd_fma(simd_neg(a), b, c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_fnmadd_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
+ simd_select_bitmask(k, _mm512_fnmadd_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_fnmadd_ph(a, b, c), _mm512_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fnmadd_round_ph(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ vfmaddph_512(simd_neg(a), b, c, ROUNDING)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from a when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fnmadd_round_ph(
+ a: __m512h,
+ k: __mmask32,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_fnmadd_round_ph::(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
+/// from c when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fnmadd_round_ph(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+ k: __mmask32,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_fnmadd_round_ph::(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
+/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fnmadd_round_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(
+ k,
+ _mm512_fnmadd_round_ph::(a, b, c),
+ _mm512_setzero_ph(),
+ )
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
+/// elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ let r = fmaf16(-extracta, extractb, extractc);
+ simd_insert!(a, 0, r)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
+/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+ let mut fnmadd: f16 = simd_extract!(a, 0);
+ if k & 1 != 0 {
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fnmadd = fmaf16(-fnmadd, extractb, extractc);
+ }
+ simd_insert!(a, 0, fnmadd)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
+/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
+/// elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+ let mut fnmadd: f16 = simd_extract!(c, 0);
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ fnmadd = fmaf16(-extracta, extractb, fnmadd);
+ }
+ simd_insert!(c, 0, fnmadd)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
+/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ let mut fnmadd: f16 = 0.0;
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fnmadd = fmaf16(-extracta, extractb, extractc);
+ }
+ simd_insert!(a, 0, fnmadd)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
+/// elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fnmadd_round_sh(
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ let r = vfmaddsh(-extracta, extractb, extractc, ROUNDING);
+ simd_insert!(a, 0, r)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
+/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fnmadd_round_sh(
+ a: __m128h,
+ k: __mmask8,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let mut fnmadd: f16 = simd_extract!(a, 0);
+ if k & 1 != 0 {
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fnmadd = vfmaddsh(-fnmadd, extractb, extractc, ROUNDING);
+ }
+ simd_insert!(a, 0, fnmadd)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
+/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
+/// elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fnmadd_round_sh(
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+ k: __mmask8,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let mut fnmadd: f16 = simd_extract!(c, 0);
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ fnmadd = vfmaddsh(-extracta, extractb, fnmadd, ROUNDING);
+ }
+ simd_insert!(c, 0, fnmadd)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
+/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fnmadd_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let mut fnmadd: f16 = 0.0;
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fnmadd = vfmaddsh(-extracta, extractb, extractc, ROUNDING);
+ }
+ simd_insert!(a, 0, fnmadd)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ simd_fma(simd_neg(a), b, simd_neg(c))
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
+/// copied from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_fnmsub_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
+/// copied from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+ simd_select_bitmask(k, _mm_fnmsub_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
+/// zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_fnmsub_ph(a, b, c), _mm_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+ simd_fma(simd_neg(a), b, simd_neg(c))
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
+/// copied from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_fnmsub_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
+/// copied from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
+ simd_select_bitmask(k, _mm256_fnmsub_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
+/// zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_fnmsub_ph(a, b, c), _mm256_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+ simd_fma(simd_neg(a), b, simd_neg(c))
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
+/// copied from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_fnmsub_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
+/// copied from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
+ simd_select_bitmask(k, _mm512_fnmsub_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
+/// zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_fnmsub_ph(a, b, c), _mm512_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fnmsub_round_ph(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ vfmaddph_512(simd_neg(a), b, simd_neg(c), ROUNDING)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
+/// copied from a when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fnmsub_round_ph(
+ a: __m512h,
+ k: __mmask32,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_fnmsub_round_ph::(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
+/// copied from c when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fnmsub_round_ph(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+ k: __mmask32,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_fnmsub_round_ph::(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
+/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
+/// zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fnmsub_round_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(
+ k,
+ _mm512_fnmsub_round_ph::(a, b, c),
+ _mm512_setzero_ph(),
+ )
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
+/// elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ let r = fmaf16(-extracta, extractb, -extractc);
+ simd_insert!(a, 0, r)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
+/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+ let mut fnmsub: f16 = simd_extract!(a, 0);
+ if k & 1 != 0 {
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fnmsub = fmaf16(-fnmsub, extractb, -extractc);
+ }
+ simd_insert!(a, 0, fnmsub)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
+/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
+/// elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+ let mut fnmsub: f16 = simd_extract!(c, 0);
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ fnmsub = fmaf16(-extracta, extractb, -fnmsub);
+ }
+ simd_insert!(c, 0, fnmsub)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
+/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ let mut fnmsub: f16 = 0.0;
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fnmsub = fmaf16(-extracta, extractb, -extractc);
+ }
+ simd_insert!(a, 0, fnmsub)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
+/// elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fnmsub_round_sh(
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ let r = vfmaddsh(-extracta, extractb, -extractc, ROUNDING);
+ simd_insert!(a, 0, r)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
+/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fnmsub_round_sh(
+ a: __m128h,
+ k: __mmask8,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let mut fnmsub: f16 = simd_extract!(a, 0);
+ if k & 1 != 0 {
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fnmsub = vfmaddsh(-fnmsub, extractb, -extractc, ROUNDING);
+ }
+ simd_insert!(a, 0, fnmsub)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
+/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
+/// elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fnmsub_round_sh(
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+ k: __mmask8,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let mut fnmsub: f16 = simd_extract!(c, 0);
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ fnmsub = vfmaddsh(-extracta, extractb, -fnmsub, ROUNDING);
+ }
+ simd_insert!(c, 0, fnmsub)
+}
+
+/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
+/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
+/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fnmsub_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+ c: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ let mut fnmsub: f16 = 0.0;
+ if k & 1 != 0 {
+ let extracta: f16 = simd_extract!(a, 0);
+ let extractb: f16 = simd_extract!(b, 0);
+ let extractc: f16 = simd_extract!(c, 0);
+ fnmsub = vfmaddsh(-extracta, extractb, -extractc, ROUNDING);
+ }
+ simd_insert!(a, 0, fnmsub)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ vfmaddsubph_128(a, b, c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmaddsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_fmaddsub_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmaddsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+ simd_select_bitmask(k, _mm_fmaddsub_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
+/// (the element is zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_fmaddsub_ph(a, b, c), _mm_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+ vfmaddsubph_256(a, b, c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmaddsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_fmaddsub_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmaddsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask3_fmaddsub_ph(
+ a: __m256h,
+ b: __m256h,
+ c: __m256h,
+ k: __mmask16,
+) -> __m256h {
+ simd_select_bitmask(k, _mm256_fmaddsub_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
+/// (the element is zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmaddsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmaddsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_fmaddsub_ph(
+ k: __mmask16,
+ a: __m256h,
+ b: __m256h,
+ c: __m256h,
+) -> __m256h {
+ simd_select_bitmask(k, _mm256_fmaddsub_ph(a, b, c), _mm256_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+ _mm512_fmaddsub_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_fmaddsub_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fmaddsub_ph(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+ k: __mmask32,
+) -> __m512h {
+ simd_select_bitmask(k, _mm512_fmaddsub_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
+/// (the element is zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddsub))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fmaddsub_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ simd_select_bitmask(k, _mm512_fmaddsub_ph(a, b, c), _mm512_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fmaddsub_round_ph(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ vfmaddsubph_512(a, b, c, ROUNDING)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from a when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fmaddsub_round_ph(
+ a: __m512h,
+ k: __mmask32,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_fmaddsub_round_ph::(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from c when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fmaddsub_round_ph(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+ k: __mmask32,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_fmaddsub_round_ph::(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
+/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
+/// (the element is zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fmaddsub_round_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(
+ k,
+ _mm512_fmaddsub_round_ph::(a, b, c),
+ _mm512_setzero_ph(),
+ )
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsubadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ vfmaddsubph_128(a, b, simd_neg(c))
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsubadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsubadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_fmsubadd_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsubadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsubadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+ simd_select_bitmask(k, _mm_fmsubadd_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
+/// (the element is zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsubadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsubadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_fmsubadd_ph(a, b, c), _mm_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsubadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+ vfmaddsubph_256(a, b, simd_neg(c))
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsubadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsubadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_fmsubadd_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsubadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsubadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask3_fmsubadd_ph(
+ a: __m256h,
+ b: __m256h,
+ c: __m256h,
+ k: __mmask16,
+) -> __m256h {
+ simd_select_bitmask(k, _mm256_fmsubadd_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
+/// (the element is zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsubadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vfmsubadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_fmsubadd_ph(
+ k: __mmask16,
+ a: __m256h,
+ b: __m256h,
+ c: __m256h,
+) -> __m256h {
+ simd_select_bitmask(k, _mm256_fmsubadd_ph(a, b, c), _mm256_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsubadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+ _mm512_fmsubadd_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsubadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_fmsubadd_ph(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from c when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsubadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fmsubadd_ph(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+ k: __mmask32,
+) -> __m512h {
+ simd_select_bitmask(k, _mm512_fmsubadd_ph(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
+/// (the element is zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsubadd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fmsubadd_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ simd_select_bitmask(k, _mm512_fmsubadd_ph(a, b, c), _mm512_setzero_ph())
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fmsubadd_round_ph(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ vfmaddsubph_512(a, b, simd_neg(c), ROUNDING)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from a when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fmsubadd_round_ph(
+ a: __m512h,
+ k: __mmask32,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_fmsubadd_round_ph::(a, b, c), a)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
+/// (the element is copied from c when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask3_fmsubadd_round_ph(
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+ k: __mmask32,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_fmsubadd_round_ph::(a, b, c), c)
+}
+
+/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
+/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
+/// (the element is zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_fmsubadd_round_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+ c: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(
+ k,
+ _mm512_fmsubadd_round_ph::(a, b, c),
+ _mm512_setzero_ph(),
+ )
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
+ fn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
+ #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
+ fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
+
+ #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
+ fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
+ fn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
+ fn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
+ fn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
+ fn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
+ fn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
+ fn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
+ fn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
+ fn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
+ fn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
+ fn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
+ fn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
+ fn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
+ fn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
+ fn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
+ fn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
+ fn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
+ fn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
+ fn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
+ fn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
+ fn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
+ fn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
+ fn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
+ fn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
+ fn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
+ fn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
+ fn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
+ fn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
+ fn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
+ -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
+ fn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
+ -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
+ fn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
+ fn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
+ fn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.fma.f16"]
+ fn fmaf16(a: f16, b: f16, c: f16) -> f16; // TODO: use `crate::intrinsics::fmaf16` when it's available
+ #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
+ fn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
+
+ #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"]
+ fn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"]
+ fn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
+ fn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
+
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::x86::*;
+ use crate::mem::transmute;
+ use crate::ptr::{addr_of, addr_of_mut};
+ use stdarch_test::simd_test;
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
+ _mm_setr_ph(re, im, re, im, re, im, re, im)
+ }
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
+ _mm256_setr_ph(
+ re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
+ )
+ }
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
+ _mm512_setr_ph(
+ re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
+ re, im, re, im, re, im, re, im, re, im,
+ )
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set_ph() {
+ let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_set_ph() {
+ let r = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let e = _mm256_setr_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_set_ph() {
+ let r = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let e = _mm512_setr_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set_sh() {
+ let r = _mm_set_sh(1.0);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set1_ph() {
+ let r = _mm_set1_ph(1.0);
+ let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_set1_ph() {
+ let r = _mm256_set1_ph(1.0);
+ let e = _mm256_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_set1_ph() {
+ let r = _mm512_set1_ph(1.0);
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_setr_ph() {
+ let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_setr_ph() {
+ let r = _mm256_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let e = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_setr_ph() {
+ let r = _mm512_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let e = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_setzero_ph() {
+ let r = _mm_setzero_ph();
+ let e = _mm_set1_ph(0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_setzero_ph() {
+ let r = _mm256_setzero_ph();
+ let e = _mm256_set1_ph(0.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_setzero_ph() {
+ let r = _mm512_setzero_ph();
+ let e = _mm512_set1_ph(0.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castsi128_ph() {
+ let a = _mm_set1_epi16(0x3c00);
+ let r = _mm_castsi128_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castsi256_ph() {
+ let a = _mm256_set1_epi16(0x3c00);
+ let r = _mm256_castsi256_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castsi512_ph() {
+ let a = _mm512_set1_epi16(0x3c00);
+ let r = _mm512_castsi512_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_si128() {
+ let a = _mm_set1_ph(1.0);
+ let r = _mm_castph_si128(a);
+ let e = _mm_set1_epi16(0x3c00);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_si256() {
+ let a = _mm256_set1_ph(1.0);
+ let r = _mm256_castph_si256(a);
+ let e = _mm256_set1_epi16(0x3c00);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_si512() {
+ let a = _mm512_set1_ph(1.0);
+ let r = _mm512_castph_si512(a);
+ let e = _mm512_set1_epi16(0x3c00);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castps_ph() {
+ let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
+ let r = _mm_castps_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castps_ph() {
+ let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
+ let r = _mm256_castps_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castps_ph() {
+ let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
+ let r = _mm512_castps_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_ps() {
+ let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
+ let r = _mm_castph_ps(a);
+ let e = _mm_set1_ps(1.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_ps() {
+ let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
+ let r = _mm256_castph_ps(a);
+ let e = _mm256_set1_ps(1.0);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_ps() {
+ let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
+ let r = _mm512_castph_ps(a);
+ let e = _mm512_set1_ps(1.0);
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castpd_ph() {
+ let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
+ let r = _mm_castpd_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castpd_ph() {
+ let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
+ let r = _mm256_castpd_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castpd_ph() {
+ let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
+ let r = _mm512_castpd_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_pd() {
+ let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
+ let r = _mm_castph_pd(a);
+ let e = _mm_set1_pd(1.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_pd() {
+ let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
+ let r = _mm256_castph_pd(a);
+ let e = _mm256_set1_pd(1.0);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_pd() {
+ let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
+ let r = _mm512_castph_pd(a);
+ let e = _mm512_set1_pd(1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph256_ph128() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm256_castph256_ph128(a);
+ let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph512_ph128() {
+ let a = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
+ 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ );
+ let r = _mm512_castph512_ph128(a);
+ let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph512_ph256() {
+ let a = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
+ 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ );
+ let r = _mm512_castph512_ph256(a);
+ let e = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph128_ph256() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm256_castph128_ph256(a);
+ assert_eq_m128h(_mm256_castph256_ph128(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph128_ph512() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_castph128_ph512(a);
+ assert_eq_m128h(_mm512_castph512_ph128(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph256_ph512() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_castph256_ph512(a);
+ assert_eq_m256h(_mm512_castph512_ph256(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_zextph128_ph256() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm256_zextph128_ph256(a);
+ let e = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_zextph128_ph512() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_zextph128_ph512(a);
+ let e = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_zextph256_ph512() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_zextph256_ph512(a);
+ let e = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cmp_round_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cmp_round_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cmp_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cmp_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comi_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comi_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comieq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comieq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comige_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comige_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comigt_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comigt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comile_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comile_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comilt_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comilt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comineq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comineq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomieq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomieq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomige_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomige_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomigt_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomigt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomile_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomile_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomilt_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomilt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomineq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomineq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_load_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_load_ph(addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_load_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_load_ph(addr_of!(a).cast());
+ assert_eq_m256h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_load_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_load_ph(addr_of!(a).cast());
+ assert_eq_m512h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_load_sh(addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let src = _mm_set_sh(2.);
+ let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
+ assert_eq_m128h(src, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
+ assert_eq_m128h(_mm_setzero_ph(), b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_loadu_ph() {
+ let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
+ let r = _mm_loadu_ph(array.as_ptr());
+ let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_loadu_ph() {
+ let array = [
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ ];
+ let r = _mm256_loadu_ph(array.as_ptr());
+ let e = _mm256_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_loadu_ph() {
+ let array = [
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ ];
+ let r = _mm512_loadu_ph(array.as_ptr());
+ let e = _mm512_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let r = _mm_move_sh(a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let src = _mm_set_sh(10.0);
+ let r = _mm_mask_move_sh(src, 0, a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let r = _mm_maskz_move_sh(0, a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_store_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let mut b = _mm_setzero_ph();
+ _mm_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_store_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let mut b = _mm256_setzero_ph();
+ _mm256_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m256h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_store_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let mut b = _mm512_setzero_ph();
+ _mm512_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m512h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_store_sh() {
+ let a = _mm_set_sh(1.0);
+ let mut b = _mm_setzero_ph();
+ _mm_store_sh(addr_of_mut!(b).cast(), a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_store_sh() {
+ let a = _mm_set_sh(1.0);
+ let mut b = _mm_setzero_ph();
+ _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
+ assert_eq_m128h(_mm_setzero_ph(), b);
+ _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_storeu_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let mut array = [0.0; 8];
+ _mm_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_storeu_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let mut array = [0.0; 16];
+ _mm256_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_storeu_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let mut array = [0.0; 32];
+ _mm512_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_add_ph(a, b);
+ let e = _mm_set1_ph(9.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_add_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_add_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_add_ph(a, b);
+ let e = _mm256_set1_ph(17.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_add_ph(a, b);
+ let e = _mm512_set1_ph(33.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
+ 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
+ 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(33.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
+ 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
+ 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_add_sh(a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_add_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_add_sh(src, 1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_add_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_add_sh(1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_sub_ph(a, b);
+ let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_sub_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_sub_ph(a, b);
+ let e = _mm256_set_ph(
+ -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
+ 15.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_sub_ph(a, b);
+ let e = _mm512_set_ph(
+ -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
+ -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
+ 23.0, 25.0, 27.0, 29.0, 31.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
+ 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
+ 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set_ph(
+ -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
+ -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
+ 23.0, 25.0, 27.0, 29.0, 31.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
+ 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
+ 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_sub_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_sub_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_sub_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_sub_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_sub_sh(a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_sub_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_sub_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_sub_sh(src, 1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_sub_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_sub_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_sub_sh(1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_mul_ph(a, b);
+ let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_mul_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_mul_ph(a, b);
+ let e = _mm256_set_ph(
+ 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
+ 30.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mul_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_mul_ph(a, b);
+ let e = _mm512_set_ph(
+ 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
+ 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
+ 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_mul_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
+ 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_mul_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
+ 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mul_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set_ph(
+ 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
+ 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
+ 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_mul_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
+ 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_mul_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
+ 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mul_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_mul_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_mul_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mul_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_mul_sh(a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_mul_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_mul_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_mul_sh(src, 1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_mul_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_mul_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_mul_sh(1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let r = _mm_div_ph(a, b);
+ let e = _mm_set1_ph(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
+ let r = _mm_mask_div_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let r = _mm_maskz_div_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let r = _mm256_div_ph(a, b);
+ let e = _mm256_set1_ph(0.5);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let src = _mm256_set_ph(
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0,
+ );
+ let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_div_ph(a, b);
+ let e = _mm512_set1_ph(0.5);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let src = _mm512_set_ph(
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
+ 33.0, 34.0, 35.0,
+ );
+ let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(0.5);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let src = _mm512_set_ph(
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
+ 33.0, 34.0, 35.0,
+ );
+ let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_div_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_div_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_div_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_div_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_div_sh(a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_div_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_div_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_div_sh(src, 1, a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_div_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_div_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_div_sh(1, a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_mul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_mul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_maskz_mul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_mul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ );
+ let r = _mm256_mask_mul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_mul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ );
+ assert_eq_m256h(r, e);
+ }
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_setr_ph() {
- let r = _mm512_setr_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
+ unsafe fn test_mm512_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_mul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let e = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
+ let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_setzero_ph() {
- let r = _mm_setzero_ph();
- let e = _mm_set1_ph(0.0);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ );
+ assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_setzero_ph() {
- let r = _mm256_setzero_ph();
- let e = _mm256_set1_ph(0.0);
- assert_eq_m256h(r, e);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_setzero_ph() {
- let r = _mm512_setzero_ph();
- let e = _mm512_set1_ph(0.0);
+ unsafe fn test_mm512_mask_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b0101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castsi128_ph() {
- let a = _mm_set1_epi16(0x3c00);
- let r = _mm_castsi128_ph(a);
- let e = _mm_set1_ph(1.0);
+ unsafe fn test_mm512_maskz_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castsi256_ph() {
- let a = _mm256_set1_epi16(0x3c00);
- let r = _mm256_castsi256_ph(a);
- let e = _mm256_set1_ph(1.0);
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_mask_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castsi512_ph() {
- let a = _mm512_set1_epi16(0x3c00);
- let r = _mm512_castsi512_ph(a);
- let e = _mm512_set1_ph(1.0);
- assert_eq_m512h(r, e);
+ unsafe fn test_mm_maskz_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r =
+ _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_si128() {
- let a = _mm_set1_ph(1.0);
- let r = _mm_castph_si128(a);
- let e = _mm_set1_epi16(0x3c00);
- assert_eq_m128i(r, e);
+ unsafe fn test_mm_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_mul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_si256() {
- let a = _mm256_set1_ph(1.0);
- let r = _mm256_castph_si256(a);
- let e = _mm256_set1_epi16(0x3c00);
- assert_eq_m256i(r, e);
+ unsafe fn test_mm_mask_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_mul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_si512() {
- let a = _mm512_set1_ph(1.0);
- let r = _mm512_castph_si512(a);
- let e = _mm512_set1_epi16(0x3c00);
- assert_eq_m512i(r, e);
+ unsafe fn test_mm_maskz_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_maskz_mul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_fmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_fmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_maskz_fmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_fmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ );
+ let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_fmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castps_ph() {
- let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
- let r = _mm_castps_ph(a);
- let e = _mm_set1_ph(1.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_maskz_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castps_ph() {
- let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
- let r = _mm256_castps_ph(a);
- let e = _mm256_set1_ph(1.0);
- assert_eq_m256h(r, e);
+ unsafe fn test_mm512_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castps_ph() {
- let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
- let r = _mm512_castps_ph(a);
- let e = _mm512_set1_ph(1.0);
+ unsafe fn test_mm512_mask_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b0101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_ps() {
- let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
- let r = _mm_castph_ps(a);
- let e = _mm_set1_ps(1.0);
- assert_eq_m128(r, e);
+ unsafe fn test_mm512_maskz_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_ps() {
- let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
- let r = _mm256_castph_ps(a);
- let e = _mm256_set1_ps(1.0);
- assert_eq_m256(r, e);
+ unsafe fn test_mm_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_ps() {
- let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
- let r = _mm512_castph_ps(a);
- let e = _mm512_set1_ps(1.0);
- assert_eq_m512(r, e);
+ unsafe fn test_mm_mask_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castpd_ph() {
- let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
- let r = _mm_castpd_ph(a);
- let e = _mm_set1_ph(1.0);
+ unsafe fn test_mm_maskz_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r =
+ _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castpd_ph() {
- let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
- let r = _mm256_castpd_ph(a);
- let e = _mm256_set1_ph(1.0);
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_fmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castpd_ph() {
- let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
- let r = _mm512_castpd_ph(a);
- let e = _mm512_set1_ph(1.0);
- assert_eq_m512h(r, e);
+ unsafe fn test_mm_mask_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_pd() {
- let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
- let r = _mm_castph_pd(a);
- let e = _mm_set1_pd(1.0);
- assert_eq_m128d(r, e);
+ unsafe fn test_mm_maskz_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_maskz_fmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_pd() {
- let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
- let r = _mm256_castph_pd(a);
- let e = _mm256_set1_pd(1.0);
- assert_eq_m256d(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_cmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_pd() {
- let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
- let r = _mm512_castph_pd(a);
- let e = _mm512_set1_pd(1.0);
- assert_eq_m512d(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_cmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph256_ph128() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
- );
- let r = _mm256_castph256_ph128(a);
- let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_maskz_cmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph512_ph128() {
- let a = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
- 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
- );
- let r = _mm512_castph512_ph128(a);
- let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_cmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph512_ph256() {
- let a = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
- 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm512_castph512_ph256(a);
+ let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b);
let e = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph128_ph256() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm256_castph128_ph256(a);
- assert_eq_m128h(_mm256_castph256_ph128(r), a);
+ unsafe fn test_mm512_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_cmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph128_ph512() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm512_castph128_ph512(a);
- assert_eq_m128h(_mm512_castph512_ph128(r), a);
+ unsafe fn test_mm512_mask_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph256_ph512() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ unsafe fn test_mm512_maskz_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
- let r = _mm512_castph256_ph512(a);
- assert_eq_m256h(_mm512_castph512_ph256(r), a);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_zextph128_ph256() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm256_zextph128_ph256(a);
- let e = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
- );
- assert_eq_m256h(r, e);
+ unsafe fn test_mm512_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_zextph128_ph512() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm512_zextph128_ph512(a);
+ unsafe fn test_mm512_mask_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b0101010101010101,
+ a,
+ b,
+ );
let e = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
- 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_zextph256_ph512() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ unsafe fn test_mm512_maskz_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
+ a,
+ b,
);
- let r = _mm512_zextph256_ph512(a);
let e = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
- 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmp_round_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_cmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmp_round_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
- assert_eq!(r, 0);
+ unsafe fn test_mm_mask_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_cmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmp_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm_maskz_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_maskz_cmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmp_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
- assert_eq!(r, 0);
+ unsafe fn test_mm_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comi_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm_mask_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comi_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm_maskz_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r =
+ _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comieq_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comieq_sh(a, b);
- assert_eq!(r, 1);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_fcmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_fcmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comige_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comige_sh(a, b);
- assert_eq!(r, 1);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_fcmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_fcmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comigt_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comigt_sh(a, b);
- assert_eq!(r, 1);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_fcmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_maskz_fcmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comile_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_comile_sh(a, b);
- assert_eq!(r, 1);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fcmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_fcmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comilt_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_comilt_sh(a, b);
- assert_eq!(r, 1);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_fcmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ );
+ let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_fcmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comineq_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_comineq_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_fcmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_fcmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomieq_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomieq_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_mask_fcmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomige_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomige_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_maskz_fcmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomigt_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomigt_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_fcmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomile_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_ucomile_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_mask_fcmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b0101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomilt_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_ucomilt_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_maskz_fcmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomineq_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_ucomineq_sh(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_load_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_load_ph(addr_of!(a).cast());
- assert_eq_m128h(a, b);
+ unsafe fn test_mm_fcmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_fcmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_load_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_load_ph(addr_of!(a).cast());
- assert_eq_m256h(a, b);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_fcmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fcmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_load_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_load_ph(addr_of!(a).cast());
- assert_eq_m512h(a, b);
+ unsafe fn test_mm_maskz_fcmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_maskz_fcmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_load_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_load_sh(addr_of!(a).cast());
- assert_eq_m128h(a, b);
+ unsafe fn test_mm_fcmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_load_sh() {
- let a = _mm_set_sh(1.0);
- let src = _mm_set_sh(2.);
- let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
- assert_eq_m128h(a, b);
- let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
- assert_eq_m128h(src, b);
+ unsafe fn test_mm_mask_fcmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_load_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
- assert_eq_m128h(a, b);
- let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
- assert_eq_m128h(_mm_setzero_ph(), b);
+ unsafe fn test_mm_maskz_fcmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r =
+ _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_loadu_ph() {
- let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
- let r = _mm_loadu_ph(array.as_ptr());
- let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ unsafe fn test_mm_abs_ph() {
+ let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0);
+ let r = _mm_abs_ph(a);
+ let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_loadu_ph() {
- let array = [
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- ];
- let r = _mm256_loadu_ph(array.as_ptr());
- let e = _mm256_setr_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ unsafe fn test_mm256_abs_ph() {
+ let a = _mm256_set_ph(
+ -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
+ -14.0,
+ );
+ let r = _mm256_abs_ph(a);
+ let e = _mm256_set_ph(
+ 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_loadu_ph() {
- let array = [
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- ];
- let r = _mm512_loadu_ph(array.as_ptr());
- let e = _mm512_setr_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
+ unsafe fn test_mm512_abs_ph() {
+ let a = _mm512_set_ph(
+ -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
+ -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0,
+ 27.0, -28.0, 29.0, -30.0,
+ );
+ let r = _mm512_abs_ph(a);
+ let e = _mm512_set_ph(
+ 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
+ 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0,
+ 29.0, 30.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let r = _mm_move_sh(a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_conj_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_conj_pch(a);
+ let e = _mm_set1_pch(0.0, -1.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let src = _mm_set_sh(10.0);
- let r = _mm_mask_move_sh(src, 0, a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_conj_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_conj_pch(src, 0b0101, a);
+ let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let r = _mm_maskz_move_sh(0, a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_conj_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_maskz_conj_pch(0b0101, a);
+ let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_store_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let mut b = _mm_setzero_ph();
- _mm_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m128h(a, b);
+ unsafe fn test_mm256_conj_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_conj_pch(a);
+ let e = _mm256_set1_pch(0.0, -1.0);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_store_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ unsafe fn test_mm256_mask_conj_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let mut b = _mm256_setzero_ph();
- _mm256_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m256h(a, b);
+ let r = _mm256_mask_conj_pch(src, 0b01010101, a);
+ let e = _mm256_setr_ph(
+ 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_store_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_conj_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_conj_pch(0b01010101, a);
+ let e = _mm256_setr_ph(
+ 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
);
- let mut b = _mm512_setzero_ph();
- _mm512_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m512h(a, b);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_store_sh() {
- let a = _mm_set_sh(1.0);
- let mut b = _mm_setzero_ph();
- _mm_store_sh(addr_of_mut!(b).cast(), a);
- assert_eq_m128h(a, b);
+ unsafe fn test_mm512_conj_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_conj_pch(a);
+ let e = _mm512_set1_pch(0.0, -1.0);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_store_sh() {
- let a = _mm_set_sh(1.0);
- let mut b = _mm_setzero_ph();
- _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
- assert_eq_m128h(_mm_setzero_ph(), b);
- _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
- assert_eq_m128h(a, b);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_storeu_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let mut array = [0.0; 8];
- _mm_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_storeu_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ unsafe fn test_mm512_mask_conj_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let mut array = [0.0; 16];
- _mm256_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
+ let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a);
+ let e = _mm512_setr_ph(
+ 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
+ 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0,
+ 33.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_storeu_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
+ unsafe fn test_mm512_maskz_conj_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_conj_pch(0b0101010101010101, a);
+ let e = _mm512_setr_ph(
+ 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
+ 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
);
- let mut array = [0.0; 32];
- _mm512_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_add_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_add_ph(a, b);
- let e = _mm_set1_ph(9.0);
+ unsafe fn test_mm_fmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_fmadd_pch(a, b, c);
+ let e = _mm_set1_pch(-2.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_add_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_add_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
+ unsafe fn test_mm_mask_fmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask_fmadd_pch(a, 0b0101, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_add_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_add_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
+ unsafe fn test_mm_mask3_fmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101);
+ let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_add_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_add_ph(a, b);
- let e = _mm256_set1_ph(17.0);
+ unsafe fn test_mm_maskz_fmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_maskz_fmadd_pch(0b0101, a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_fmadd_pch(a, b, c);
+ let e = _mm256_set1_pch(-2.0, 3.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_add_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
- );
- let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
+ unsafe fn test_mm256_mask_fmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c);
+ let e = _mm256_setr_ph(
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_add_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ unsafe fn test_mm256_mask3_fmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101);
+ let e = _mm256_setr_ph(
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
);
- let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_fmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c);
+ let e = _mm256_setr_ph(
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_add_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_add_ph(a, b);
- let e = _mm512_set1_ph(33.0);
+ unsafe fn test_mm512_fmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_fmadd_pch(a, b, c);
+ let e = _mm512_set1_pch(-2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_add_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
- );
- let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
- 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ unsafe fn test_mm512_mask_fmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c);
+ let e = _mm512_setr_ph(
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_add_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
- 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ unsafe fn test_mm512_mask3_fmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101);
+ let e = _mm512_setr_ph(
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_add_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
+ unsafe fn test_mm512_maskz_fmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c);
+ let e = _mm512_setr_ph(
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
);
- let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(33.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_add_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
- );
- let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_fmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r =
+ _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pch(-2.0, 3.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_fmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
+ 0b0101010101010101,
b,
+ c,
);
- let e = _mm512_set_ph(
- 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
- 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ let e = _mm512_setr_ph(
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_add_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
+ unsafe fn test_mm512_mask3_fmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
+ b,
+ c,
+ 0b0101010101010101,
);
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
+ let e = _mm512_setr_ph(
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
);
- let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_fmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
+ c,
);
- let e = _mm512_set_ph(
- 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
- 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ let e = _mm512_setr_ph(
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_add_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(3.0);
+ unsafe fn test_mm_fmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fmadd_sch(a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_add_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_set_sh(4.0);
+ unsafe fn test_mm_mask_fmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fmadd_sch(a, 0, b, c);
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
- let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(3.0);
+ let r = _mm_mask_fmadd_sch(a, 1, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_add_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
+ unsafe fn test_mm_mask3_fmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fmadd_sch(a, b, c, 0);
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
assert_eq_m128h(r, e);
- let r =
- _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(3.0);
+ let r = _mm_mask3_fmadd_sch(a, b, c, 1);
+ let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_add_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_add_sh(a, b);
- let e = _mm_set_sh(3.0);
+ unsafe fn test_mm_maskz_fmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fmadd_sch(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmadd_sch(1, a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_fmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_fmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_add_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_add_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
+ unsafe fn test_mm_mask3_fmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
assert_eq_m128h(r, e);
- let r = _mm_mask_add_sh(src, 1, a, b);
- let e = _mm_set_sh(3.0);
+ let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
+ );
+ let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_add_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_add_sh(0, a, b);
- let e = _mm_set_sh(0.0);
+ unsafe fn test_mm_maskz_fmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
- let r = _mm_maskz_add_sh(1, a, b);
- let e = _mm_set_sh(3.0);
+ let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_sub_ph(a, b);
- let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
+ unsafe fn test_mm_fcmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_fcmadd_pch(a, b, c);
+ let e = _mm_set1_pch(2.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
+ unsafe fn test_mm_mask_fcmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_sub_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
+ unsafe fn test_mm_mask3_fcmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101);
+ let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_sub_ph(a, b);
- let e = _mm256_set_ph(
- -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
- 15.0,
- );
+ unsafe fn test_mm_maskz_fcmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fcmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_fcmadd_pch(a, b, c);
+ let e = _mm256_set1_pch(2.0, 3.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
- );
- let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
+ unsafe fn test_mm256_mask_fcmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c);
+ let e = _mm256_setr_ph(
+ 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ unsafe fn test_mm256_mask3_fcmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101);
+ let e = _mm256_setr_ph(
+ 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
);
- let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_fcmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c);
+ let e = _mm256_setr_ph(
+ 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_sub_ph(a, b);
- let e = _mm512_set_ph(
- -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
- -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
- 23.0, 25.0, 27.0, 29.0, 31.0,
- );
+ unsafe fn test_mm512_fcmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_fcmadd_pch(a, b, c);
+ let e = _mm512_set1_pch(2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
- );
- let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
- 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ unsafe fn test_mm512_mask_fcmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c);
+ let e = _mm512_setr_ph(
+ 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
+ 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
- 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ unsafe fn test_mm512_mask3_fcmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101);
+ let e = _mm512_setr_ph(
+ 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
+ 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set_ph(
- -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
- -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
- 23.0, 25.0, 27.0, 29.0, 31.0,
+ unsafe fn test_mm512_maskz_fcmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c);
+ let e = _mm512_setr_ph(
+ 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
+ 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
- );
- let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_fcmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r =
+ _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pch(2.0, 3.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_fcmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
+ 0b0101010101010101,
b,
+ c,
);
- let e = _mm512_set_ph(
- 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
- 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ let e = _mm512_setr_ph(
+ 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
+ 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
+ unsafe fn test_mm512_mask3_fcmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
+ b,
+ c,
+ 0b0101010101010101,
);
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
+ let e = _mm512_setr_ph(
+ 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
+ 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
);
- let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_fcmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
+ c,
);
- let e = _mm512_set_ph(
- 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
- 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ let e = _mm512_setr_ph(
+ 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
+ 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_fcmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fcmadd_sch(a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_set_sh(4.0);
+ unsafe fn test_mm_mask_fcmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fcmadd_sch(a, 0, b, c);
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
- let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(-1.0);
+ let r = _mm_mask_fcmadd_sch(a, 1, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask3_fcmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fcmadd_sch(a, b, c, 0);
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fcmadd_sch(a, b, c, 1);
+ let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fcmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fcmadd_sch(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_fcmadd_sch(1, a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r =
- _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_fcmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_sub_sh(a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_mask_fcmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_sub_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
+ unsafe fn test_mm_mask3_fcmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
assert_eq_m128h(r, e);
- let r = _mm_mask_sub_sh(src, 1, a, b);
- let e = _mm_set_sh(-1.0);
+ let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
+ );
+ let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_sub_sh(0, a, b);
- let e = _mm_set_sh(0.0);
+ unsafe fn test_mm_maskz_fcmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
- let r = _mm_maskz_sub_sh(1, a, b);
- let e = _mm_set_sh(-1.0);
+ let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_mul_ph(a, b);
- let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
+ unsafe fn test_mm_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fmadd_ph(a, b, c);
+ let e = _mm_set1_ph(5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
+ unsafe fn test_mm_mask_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_mul_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
+ unsafe fn test_mm_mask3_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_mul_ph(a, b);
+ unsafe fn test_mm_maskz_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fmadd_ph(a, b, c);
+ let e = _mm256_set1_ph(5.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c);
let e = _mm256_set_ph(
- 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
- 30.0, 16.0,
+ 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
- );
- let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
+ unsafe fn test_mm256_mask3_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101);
let e = _mm256_set_ph(
- 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
+ 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
+ unsafe fn test_mm256_maskz_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c);
let e = _mm256_set_ph(
- 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
+ 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_mul_ph(a, b);
+ unsafe fn test_mm512_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmadd_ph(a, b, c);
+ let e = _mm512_set1_ph(5.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c);
let e = _mm512_set_ph(
- 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
- 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
- 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
+ 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
- );
- let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_mask3_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101);
let e = _mm512_set_ph(
- 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
- 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
+ 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_maskz_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c);
let e = _mm512_set_ph(
- 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
- 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
+ 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
+ unsafe fn test_mm512_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(5.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
+ 0b01010101010101010101010101010101,
+ b,
+ c,
);
- let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
let e = _mm512_set_ph(
- 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
- 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
- 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
+ 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
- );
- let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_mask3_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b01010101010101010101010101010101,
);
let e = _mm512_set_ph(
- 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
- 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
+ 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
b,
+ c,
);
let e = _mm512_set_ph(
- 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
- 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
+ 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmadd_sh(a, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_set_sh(4.0);
+ unsafe fn test_mm_mask_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmadd_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(2.0);
+ let r = _mm_mask_fmadd_sh(a, 1, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask3_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmadd_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fmadd_sh(a, b, c, 1);
+ let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
+ unsafe fn test_mm_maskz_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmadd_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r =
- _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(2.0);
+ let r = _mm_maskz_fmadd_sh(1, a, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_mul_sh(a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_mul_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
+ unsafe fn test_mm_mask_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_mul_sh(src, 1, a, b);
- let e = _mm_set_sh(2.0);
+ let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_mul_sh(0, a, b);
- let e = _mm_set_sh(0.0);
+ unsafe fn test_mm_mask3_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_mul_sh(1, a, b);
- let e = _mm_set_sh(2.0);
+ let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
+ );
+ let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_div_ph() {
+ unsafe fn test_mm_fmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
- let r = _mm_div_ph(a, b);
- let e = _mm_set1_ph(0.5);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fmsub_ph(a, b, c);
+ let e = _mm_set1_ph(-1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_div_ph() {
+ unsafe fn test_mm_mask_fmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
- let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
- let r = _mm_mask_div_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_div_ph() {
+ unsafe fn test_mm_mask3_fmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
- let r = _mm_maskz_div_ph(0b01010101, a, b);
- let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_div_ph() {
+ unsafe fn test_mm_maskz_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
- let r = _mm256_div_ph(a, b);
- let e = _mm256_set1_ph(0.5);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fmsub_ph(a, b, c);
+ let e = _mm256_set1_ph(-1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_div_ph() {
+ unsafe fn test_mm256_mask_fmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
- let src = _mm256_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0,
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c);
+ let e = _mm256_set_ph(
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
);
- let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101);
let e = _mm256_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_div_ph() {
+ unsafe fn test_mm256_maskz_fmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
- let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c);
let e = _mm256_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_div_ph() {
+ unsafe fn test_mm512_fmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
- let r = _mm512_div_ph(a, b);
- let e = _mm512_set1_ph(0.5);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmsub_ph(a, b, c);
+ let e = _mm512_set1_ph(-1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_div_ph() {
+ unsafe fn test_mm512_mask_fmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
- let src = _mm512_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
- 33.0, 34.0, 35.0,
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c);
+ let e = _mm512_set_ph(
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
);
- let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101);
let e = _mm512_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
- 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_div_ph() {
+ unsafe fn test_mm512_maskz_fmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
- let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c);
let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_div_round_ph() {
+ unsafe fn test_mm512_fmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
- let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(0.5);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(-1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_div_round_ph() {
+ unsafe fn test_mm512_mask_fmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
- let src = _mm512_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
- 33.0, 34.0, 35.0,
- );
- let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
0b01010101010101010101010101010101,
+ b,
+ c,
+ );
+ let e = _mm512_set_ph(
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b01010101010101010101010101010101,
);
let e = _mm512_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
- 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_div_round_ph() {
+ unsafe fn test_mm512_maskz_fmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
- let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
b,
+ c,
);
let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmsub_sh(a, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_set_sh(4.0);
+ unsafe fn test_mm_mask_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmsub_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(0.5);
+ let r = _mm_mask_fmsub_sh(a, 1, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
+ unsafe fn test_mm_mask3_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmsub_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r =
- _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(0.5);
+ let r = _mm_mask3_fmsub_sh(a, b, c, 1);
+ let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_div_sh(a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_maskz_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmsub_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmsub_sh(1, a, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_div_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
+ unsafe fn test_mm_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_div_sh(src, 1, a, b);
- let e = _mm_set_sh(0.5);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_div_sh(0, a, b);
- let e = _mm_set_sh(0.0);
+ unsafe fn test_mm_mask3_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_div_sh(1, a, b);
- let e = _mm_set_sh(0.5);
+ let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
+ );
+ let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_mul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fnmadd_ph(a, b, c);
+ let e = _mm_set1_ph(1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_mul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_mul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ unsafe fn test_mm_mask_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_mul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_maskz_mul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ unsafe fn test_mm_mask3_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_mul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm_maskz_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fnmadd_ph(a, b, c);
+ let e = _mm256_set1_ph(1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_mul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ unsafe fn test_mm256_mask_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c);
+ let e = _mm256_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
);
- let r = _mm256_mask_mul_pch(src, 0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101);
+ let e = _mm256_set_ph(
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_mul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_maskz_mul_pch(0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm256_maskz_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_mul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm512_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fnmadd_ph(a, b, c);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ unsafe fn test_mm512_mask_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
);
- let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let e = _mm512_set_ph(
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm512_maskz_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm512_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r =
+ _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ unsafe fn test_mm512_mask_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
+ 0b01010101010101010101010101010101,
+ b,
+ c,
);
- let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
- 0b0101010101010101,
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b01010101010101010101010101010101,
);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ let e = _mm512_set_ph(
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
+ unsafe fn test_mm512_maskz_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
+ c,
);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ let e = _mm512_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fnmadd_sh(a, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fnmadd_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fnmadd_sh(a, 1, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r =
- _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask3_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmadd_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fnmadd_sh(a, b, c, 1);
+ let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_mul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_maskz_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fnmadd_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_fnmadd_sh(1, a, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_mul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_maskz_mul_sch(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask3_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
+ );
+ let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_fmul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm_fnmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fnmsub_ph(a, b, c);
+ let e = _mm_set1_ph(-5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_fmul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ unsafe fn test_mm_mask_fnmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_maskz_fmul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ unsafe fn test_mm_mask3_fnmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_fmul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm_maskz_fnmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fnmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fnmsub_ph(a, b, c);
+ let e = _mm256_set1_ph(-5.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ unsafe fn test_mm256_mask_fnmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c);
+ let e = _mm256_set_ph(
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
);
- let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fnmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101);
+ let e = _mm256_set_ph(
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm256_maskz_fnmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_fmul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm512_fnmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fnmsub_ph(a, b, c);
+ let e = _mm512_set1_ph(-5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ unsafe fn test_mm512_mask_fnmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c);
+ let e = _mm512_set_ph(
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
);
- let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fnmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101);
+ let e = _mm512_set_ph(
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm512_maskz_fnmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm512_fnmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r =
+ _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(-5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ unsafe fn test_mm512_mask_fnmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
+ 0b01010101010101010101010101010101,
+ b,
+ c,
);
- let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
- 0b0101010101010101,
+ let e = _mm512_set_ph(
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fnmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b01010101010101010101010101010101,
);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ let e = _mm512_set_ph(
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
+ unsafe fn test_mm512_maskz_fnmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
+ c,
);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ let e = _mm512_set_ph(
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fnmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fnmsub_sh(a, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask_fnmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fnmsub_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fnmsub_sh(a, 1, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r =
- _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask3_fnmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmsub_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fnmsub_sh(a, b, c, 1);
+ let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_fmul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_maskz_fnmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fnmsub_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_fnmsub_sh(1, a, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fmul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fnmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_maskz_fmul_sch(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask_fnmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask3_fnmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
+ );
+ let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fnmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_cmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_cmul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm_fmaddsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fmaddsub_ph(a, b, c);
+ let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_cmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_cmul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ unsafe fn test_mm_mask_fmaddsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c);
+ let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_cmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_maskz_cmul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ unsafe fn test_mm_mask3_fmaddsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011);
+ let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_cmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_cmul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_maskz_fmaddsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c);
+ let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_cmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- );
- let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ unsafe fn test_mm256_fmaddsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fmaddsub_ph(a, b, c);
+ let e = _mm256_set_ph(
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_cmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm256_mask_fmaddsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c);
+ let e = _mm256_set_ph(
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
);
assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_cmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_cmul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
- assert_eq_m512h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fmaddsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011);
+ let e = _mm256_set_ph(
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_cmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_fmaddsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
);
- assert_eq_m512h(r, e);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_cmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm512_fmaddsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmaddsub_ph(a, b, c);
+ let e = _mm512_set_ph(
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_cmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm512_mask_fmaddsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c);
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_cmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
- 0b0101010101010101,
- a,
- b,
- );
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ unsafe fn test_mm512_mask3_fmaddsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011);
+ let e = _mm512_set_ph(
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_cmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
- a,
- b,
- );
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm512_maskz_fmaddsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_cmul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_cmul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_fmaddsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r =
+ _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set_ph(
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_cmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_maskz_cmul_sch(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_mask_fmaddsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
+ 0b00110011001100110011001100110011,
+ b,
+ c,
+ );
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_mask3_fmaddsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
+ b,
+ c,
+ 0b00110011001100110011001100110011,
+ );
+ let e = _mm512_set_ph(
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
+ unsafe fn test_mm512_maskz_fmaddsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00110011001100110011001100110011,
+ a,
+ b,
+ c,
);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
+ let e = _mm512_set_ph(
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ );
+ assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_cmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r =
- _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_fmsubadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fmsubadd_ph(a, b, c);
+ let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fcmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_fcmul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm_mask_fmsubadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c);
+ let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fcmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_fcmul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ unsafe fn test_mm_mask3_fmsubadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011);
+ let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fcmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_maskz_fcmul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ unsafe fn test_mm_maskz_fmsubadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c);
+ let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fcmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_fcmul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm256_fmsubadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fmsubadd_ph(a, b, c);
+ let e = _mm256_set_ph(
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fcmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ unsafe fn test_mm256_mask_fmsubadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c);
+ let e = _mm256_set_ph(
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
);
- let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fmsubadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011);
+ let e = _mm256_set_ph(
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fcmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm256_maskz_fmsubadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_fcmul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm512_fmsubadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmsubadd_ph(a, b, c);
+ let e = _mm512_set_ph(
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ unsafe fn test_mm512_mask_fmsubadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c);
+ let e = _mm512_set_ph(
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm512_mask3_fmsubadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011);
+ let e = _mm512_set_ph(
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm512_maskz_fmsubadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ unsafe fn test_mm512_fmsubadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r =
+ _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set_ph(
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
);
- let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
- 0b0101010101010101,
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_fmsubadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
+ 0b00110011001100110011001100110011,
b,
+ c,
);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ let e = _mm512_set_ph(
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
+ unsafe fn test_mm512_mask3_fmsubadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b00110011001100110011001100110011,
);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ let e = _mm512_set_ph(
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_fcmul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fcmul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_maskz_fcmul_sch(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
+ unsafe fn test_mm512_maskz_fmsubadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00110011001100110011001100110011,
+ a,
+ b,
+ c,
);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r =
- _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
+ let e = _mm512_set_ph(
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ );
+ assert_eq_m512h(r, e);
}
}
From e6a59102baa1ac32f5ea70895ca4b94e0a93f968 Mon Sep 17 00:00:00 2001
From: sayantn
Date: Sat, 13 Jul 2024 12:47:28 +0530
Subject: [PATCH 05/11] AVX512FP16 Part 4: Math functions
Reciprocal, RSqrt, Sqrt, Max, Min
---
crates/core_arch/missing-x86.md | 78 -
crates/core_arch/src/x86/avx512fp16.rs | 8009 +++++++++++++++---------
2 files changed, 5039 insertions(+), 3048 deletions(-)
diff --git a/crates/core_arch/missing-x86.md b/crates/core_arch/missing-x86.md
index 08b3ab9a18..c0b8aa1457 100644
--- a/crates/core_arch/missing-x86.md
+++ b/crates/core_arch/missing-x86.md
@@ -159,20 +159,12 @@
* [ ] [`_mm512_mask_getexp_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ph)
* [ ] [`_mm512_mask_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_ph)
* [ ] [`_mm512_mask_getmant_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_ph)
- * [ ] [`_mm512_mask_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_ph)
- * [ ] [`_mm512_mask_max_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_round_ph)
- * [ ] [`_mm512_mask_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_ph)
- * [ ] [`_mm512_mask_min_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_round_ph)
- * [ ] [`_mm512_mask_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp_ph)
* [ ] [`_mm512_mask_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph)
* [ ] [`_mm512_mask_reduce_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph)
* [ ] [`_mm512_mask_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ph)
* [ ] [`_mm512_mask_roundscale_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_ph)
- * [ ] [`_mm512_mask_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rsqrt_ph)
* [ ] [`_mm512_mask_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_ph)
* [ ] [`_mm512_mask_scalef_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph)
- * [ ] [`_mm512_mask_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_ph)
- * [ ] [`_mm512_mask_sqrt_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_ph)
* [ ] [`_mm512_maskz_cvt_roundepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph)
* [ ] [`_mm512_maskz_cvt_roundepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph)
* [ ] [`_mm512_maskz_cvt_roundepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph)
@@ -221,27 +213,14 @@
* [ ] [`_mm512_maskz_getexp_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ph)
* [ ] [`_mm512_maskz_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ph)
* [ ] [`_mm512_maskz_getmant_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_ph)
- * [ ] [`_mm512_maskz_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_ph)
- * [ ] [`_mm512_maskz_max_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_round_ph)
- * [ ] [`_mm512_maskz_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_ph)
- * [ ] [`_mm512_maskz_min_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_round_ph)
- * [ ] [`_mm512_maskz_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rcp_ph)
* [ ] [`_mm512_maskz_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph)
* [ ] [`_mm512_maskz_reduce_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph)
* [ ] [`_mm512_maskz_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ph)
* [ ] [`_mm512_maskz_roundscale_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_ph)
- * [ ] [`_mm512_maskz_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rsqrt_ph)
* [ ] [`_mm512_maskz_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_ph)
* [ ] [`_mm512_maskz_scalef_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ph)
- * [ ] [`_mm512_maskz_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_ph)
- * [ ] [`_mm512_maskz_sqrt_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_round_ph)
- * [ ] [`_mm512_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_ph)
- * [ ] [`_mm512_max_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_round_ph)
- * [ ] [`_mm512_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_ph)
- * [ ] [`_mm512_min_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_round_ph)
* [ ] [`_mm512_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph)
* [ ] [`_mm512_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph)
- * [ ] [`_mm512_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp_ph)
* [ ] [`_mm512_reduce_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ph)
* [ ] [`_mm512_reduce_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ph)
* [ ] [`_mm512_reduce_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ph)
@@ -250,12 +229,9 @@
* [ ] [`_mm512_reduce_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_round_ph)
* [ ] [`_mm512_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ph)
* [ ] [`_mm512_roundscale_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_ph)
- * [ ] [`_mm512_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rsqrt_ph)
* [ ] [`_mm512_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ph)
* [ ] [`_mm512_scalef_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ph)
* [ ] [`_mm512_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_pch)
- * [ ] [`_mm512_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_ph)
- * [ ] [`_mm512_sqrt_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_round_ph)
* [ ] [`_mm_cvt_roundi32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh)
* [ ] [`_mm_cvt_roundi64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sh)
* [ ] [`_mm_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_sh)
@@ -309,16 +285,12 @@
* [ ] [`_mm_mask_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh)
* [ ] [`_mm_mask_getmant_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh)
* [ ] [`_mm_mask_getmant_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh)
- * [ ] [`_mm_mask_rcp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_sh)
* [ ] [`_mm_mask_reduce_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh)
* [ ] [`_mm_mask_reduce_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh)
* [ ] [`_mm_mask_roundscale_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_sh)
* [ ] [`_mm_mask_roundscale_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_sh)
- * [ ] [`_mm_mask_rsqrt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_sh)
* [ ] [`_mm_mask_scalef_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_sh)
* [ ] [`_mm_mask_scalef_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh)
- * [ ] [`_mm_mask_sqrt_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_round_sh)
- * [ ] [`_mm_mask_sqrt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_sh)
* [ ] [`_mm_maskz_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh)
* [ ] [`_mm_maskz_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd)
* [ ] [`_mm_maskz_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss)
@@ -331,27 +303,19 @@
* [ ] [`_mm_maskz_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh)
* [ ] [`_mm_maskz_getmant_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh)
* [ ] [`_mm_maskz_getmant_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh)
- * [ ] [`_mm_maskz_rcp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_sh)
* [ ] [`_mm_maskz_reduce_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh)
* [ ] [`_mm_maskz_reduce_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh)
* [ ] [`_mm_maskz_roundscale_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_sh)
* [ ] [`_mm_maskz_roundscale_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_sh)
- * [ ] [`_mm_maskz_rsqrt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_sh)
* [ ] [`_mm_maskz_scalef_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_sh)
* [ ] [`_mm_maskz_scalef_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh)
- * [ ] [`_mm_maskz_sqrt_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_round_sh)
- * [ ] [`_mm_maskz_sqrt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_sh)
- * [ ] [`_mm_rcp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_sh)
* [ ] [`_mm_reduce_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh)
* [ ] [`_mm_reduce_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh)
* [ ] [`_mm_roundscale_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_sh)
* [ ] [`_mm_roundscale_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_sh)
- * [ ] [`_mm_rsqrt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_sh)
* [ ] [`_mm_scalef_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sh)
* [ ] [`_mm_scalef_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sh)
* [ ] [`_mm_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pch)
- * [ ] [`_mm_sqrt_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_round_sh)
- * [ ] [`_mm_sqrt_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sh)
@@ -410,14 +374,9 @@
* [ ] [`_mm256_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask)
* [ ] [`_mm256_mask_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getexp_ph)
* [ ] [`_mm256_mask_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph)
- * [ ] [`_mm256_mask_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_ph)
- * [ ] [`_mm256_mask_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_ph)
- * [ ] [`_mm256_mask_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rcp_ph)
* [ ] [`_mm256_mask_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph)
* [ ] [`_mm256_mask_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph)
- * [ ] [`_mm256_mask_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rsqrt_ph)
* [ ] [`_mm256_mask_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph)
- * [ ] [`_mm256_mask_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sqrt_ph)
* [ ] [`_mm256_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph)
* [ ] [`_mm256_maskz_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph)
* [ ] [`_mm256_maskz_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph)
@@ -442,28 +401,18 @@
* [ ] [`_mm256_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph)
* [ ] [`_mm256_maskz_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph)
* [ ] [`_mm256_maskz_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph)
- * [ ] [`_mm256_maskz_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_ph)
- * [ ] [`_mm256_maskz_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_ph)
- * [ ] [`_mm256_maskz_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rcp_ph)
* [ ] [`_mm256_maskz_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph)
* [ ] [`_mm256_maskz_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph)
- * [ ] [`_mm256_maskz_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rsqrt_ph)
* [ ] [`_mm256_maskz_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_scalef_ph)
- * [ ] [`_mm256_maskz_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sqrt_ph)
- * [ ] [`_mm256_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ph)
- * [ ] [`_mm256_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ph)
* [ ] [`_mm256_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph)
* [ ] [`_mm256_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph)
- * [ ] [`_mm256_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ph)
* [ ] [`_mm256_reduce_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_add_ph)
* [ ] [`_mm256_reduce_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_max_ph)
* [ ] [`_mm256_reduce_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_min_ph)
* [ ] [`_mm256_reduce_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_mul_ph)
* [ ] [`_mm256_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_ph)
* [ ] [`_mm256_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ph)
- * [ ] [`_mm256_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ph)
* [ ] [`_mm256_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph)
- * [ ] [`_mm256_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ph)
* [ ] [`_mm_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask)
* [ ] [`_mm_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph)
* [ ] [`_mm_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph)
@@ -517,18 +466,9 @@
* [ ] [`_mm_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask)
* [ ] [`_mm_mask_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ph)
* [ ] [`_mm_mask_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ph)
- * [ ] [`_mm_mask_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_ph)
- * [ ] [`_mm_mask_max_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_round_sh)
- * [ ] [`_mm_mask_max_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_sh)
- * [ ] [`_mm_mask_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_ph)
- * [ ] [`_mm_mask_min_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_round_sh)
- * [ ] [`_mm_mask_min_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_sh)
- * [ ] [`_mm_mask_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_ph)
* [ ] [`_mm_mask_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph)
* [ ] [`_mm_mask_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph)
- * [ ] [`_mm_mask_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_ph)
* [ ] [`_mm_mask_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph)
- * [ ] [`_mm_mask_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_ph)
* [ ] [`_mm_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph)
* [ ] [`_mm_maskz_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph)
* [ ] [`_mm_maskz_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph)
@@ -553,36 +493,18 @@
* [ ] [`_mm_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph)
* [ ] [`_mm_maskz_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph)
* [ ] [`_mm_maskz_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ph)
- * [ ] [`_mm_maskz_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_ph)
- * [ ] [`_mm_maskz_max_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_round_sh)
- * [ ] [`_mm_maskz_max_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_sh)
- * [ ] [`_mm_maskz_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_ph)
- * [ ] [`_mm_maskz_min_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_round_sh)
- * [ ] [`_mm_maskz_min_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_sh)
- * [ ] [`_mm_maskz_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_ph)
* [ ] [`_mm_maskz_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph)
* [ ] [`_mm_maskz_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph)
- * [ ] [`_mm_maskz_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_ph)
* [ ] [`_mm_maskz_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ph)
- * [ ] [`_mm_maskz_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_ph)
- * [ ] [`_mm_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ph)
- * [ ] [`_mm_max_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_round_sh)
- * [ ] [`_mm_max_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sh)
- * [ ] [`_mm_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ph)
- * [ ] [`_mm_min_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_round_sh)
- * [ ] [`_mm_min_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sh)
* [ ] [`_mm_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph)
* [ ] [`_mm_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph)
- * [ ] [`_mm_rcp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ph)
* [ ] [`_mm_reduce_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_add_ph)
* [ ] [`_mm_reduce_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_max_ph)
* [ ] [`_mm_reduce_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_min_ph)
* [ ] [`_mm_reduce_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_mul_ph)
* [ ] [`_mm_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_ph)
* [ ] [`_mm_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ph)
- * [ ] [`_mm_rsqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ph)
* [ ] [`_mm_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ph)
- * [ ] [`_mm_sqrt_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ph)
diff --git a/crates/core_arch/src/x86/avx512fp16.rs b/crates/core_arch/src/x86/avx512fp16.rs
index 11e5f7d8e9..b30bc63ed4 100644
--- a/crates/core_arch/src/x86/avx512fp16.rs
+++ b/crates/core_arch/src/x86/avx512fp16.rs
@@ -7269,6 +7269,1177 @@ pub unsafe fn _mm512_maskz_fmsubadd_round_ph(
)
}
+/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrcpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_rcp_ph(a: __m128h) -> __m128h {
+ _mm_mask_rcp_ph(_mm_undefined_ph(), 0xff, a)
+}
+
+/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
+/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrcpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_rcp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
+ vrcpph_128(a, src, k)
+}
+
+/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
+/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrcpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_rcp_ph(k: __mmask8, a: __m128h) -> __m128h {
+ _mm_mask_rcp_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrcpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_rcp_ph(a: __m256h) -> __m256h {
+ _mm256_mask_rcp_ph(_mm256_undefined_ph(), 0xffff, a)
+}
+
+/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
+/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rcp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrcpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_rcp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
+ vrcpph_256(a, src, k)
+}
+
+/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
+/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rcp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrcpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_rcp_ph(k: __mmask16, a: __m256h) -> __m256h {
+ _mm256_mask_rcp_ph(_mm256_setzero_ph(), k, a)
+}
+
+/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrcpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_rcp_ph(a: __m512h) -> __m512h {
+ _mm512_mask_rcp_ph(_mm512_undefined_ph(), 0xffffffff, a)
+}
+
+/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
+/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrcpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_rcp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
+ vrcpph_512(a, src, k)
+}
+
+/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
+/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rcp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrcpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_rcp_ph(k: __mmask32, a: __m512h) -> __m512h {
+ _mm512_mask_rcp_ph(_mm512_setzero_ph(), k, a)
+}
+
+/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
+/// store the result in the lower element of dst, and copy the upper 7 packed elements from a to the
+/// upper elements of dst.
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrcpsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_rcp_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_rcp_sh(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
+/// store the result in the lower element of dst using writemask k (the element is copied from src when
+/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrcpsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_rcp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ vrcpsh(a, b, src, k)
+}
+
+/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
+/// store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
+/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrcpsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_rcp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_rcp_sh(_mm_setzero_ph(), k, a, b)
+}
+
+/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
+/// elements in a, and store the results in dst.
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_rsqrt_ph(a: __m128h) -> __m128h {
+ _mm_mask_rsqrt_ph(_mm_undefined_ph(), 0xff, a)
+}
+
+/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
+/// elements in a, and store the results in dst using writemask k (elements are copied from src when
+/// the corresponding mask bit is not set).
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_rsqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
+ vrsqrtph_128(a, src, k)
+}
+
+/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
+/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_rsqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
+ _mm_mask_rsqrt_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
+/// elements in a, and store the results in dst.
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_rsqrt_ph(a: __m256h) -> __m256h {
+ _mm256_mask_rsqrt_ph(_mm256_undefined_ph(), 0xffff, a)
+}
+
+/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
+/// elements in a, and store the results in dst using writemask k (elements are copied from src when
+/// the corresponding mask bit is not set).
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rsqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_rsqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
+ vrsqrtph_256(a, src, k)
+}
+
+/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
+/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rsqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_rsqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
+ _mm256_mask_rsqrt_ph(_mm256_setzero_ph(), k, a)
+}
+
+/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
+/// elements in a, and store the results in dst.
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rsqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_rsqrt_ph(a: __m512h) -> __m512h {
+ _mm512_mask_rsqrt_ph(_mm512_undefined_ph(), 0xffffffff, a)
+}
+
+/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
+/// elements in a, and store the results in dst using writemask k (elements are copied from src when
+/// the corresponding mask bit is not set).
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rsqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_rsqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
+ vrsqrtph_512(a, src, k)
+}
+
+/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
+/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rsqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_rsqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
+ _mm512_mask_rsqrt_ph(_mm512_setzero_ph(), k, a)
+}
+
+/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
+/// element in b, store the result in the lower element of dst, and copy the upper 7 packed elements from a
+/// to the upper elements of dst.
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrsqrtsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_rsqrt_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_rsqrt_sh(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
+/// element in b, store the result in the lower element of dst using writemask k (the element is copied from src
+/// when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrsqrtsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_rsqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ vrsqrtsh(a, b, src, k)
+}
+
+/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
+/// element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when
+/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+/// The maximum relative error for this approximation is less than `1.5*2^-12`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrsqrtsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_rsqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_rsqrt_sh(_mm_setzero_ph(), k, a, b)
+}
+
+/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
+/// results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_sqrt_ph(a: __m128h) -> __m128h {
+ simd_fsqrt(a)
+}
+
+/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
+/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_sqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_sqrt_ph(a), src)
+}
+
+/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
+/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_sqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_sqrt_ph(a), _mm_setzero_ph())
+}
+
+/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
+/// results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_sqrt_ph(a: __m256h) -> __m256h {
+ simd_fsqrt(a)
+}
+
+/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
+/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_sqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_sqrt_ph(a), src)
+}
+
+/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
+/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_sqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_sqrt_ph(a), _mm256_setzero_ph())
+}
+
+/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
+/// results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_sqrt_ph(a: __m512h) -> __m512h {
+ simd_fsqrt(a)
+}
+
+/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
+/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_sqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_sqrt_ph(a), src)
+}
+
+/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
+/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsqrtph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_sqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_sqrt_ph(a), _mm512_setzero_ph())
+}
+
+/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
+/// results in dst.
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_sqrt_round_ph(a: __m512h) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ vsqrtph_512(a, ROUNDING)
+}
+
+/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
+/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_sqrt_round_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_sqrt_round_ph::(a), src)
+}
+
+/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
+/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_sqrt_round_ph(k: __mmask32, a: __m512h) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_sqrt_round_ph::(a), _mm512_setzero_ph())
+}
+
+/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsqrtsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_sqrt_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_sqrt_sh(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst using writemask k (the element is copied from src when mask
+/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsqrtsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_sqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_sqrt_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
+
+/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
+/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsqrtsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_sqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_sqrt_sh(_mm_setzero_ph(), k, a, b)
+}
+
+/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_sqrt_round_sh(a: __m128h, b: __m128h) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_sqrt_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst using writemask k (the element is copied from src when mask
+/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_sqrt_round_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ vsqrtsh(a, b, src, k, ROUNDING)
+}
+
+/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
+/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_sqrt_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_sqrt_round_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
+/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
+/// value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmaxph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_max_ph(a: __m128h, b: __m128h) -> __m128h {
+ vmaxph_128(a, b)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
+/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
+/// NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmaxph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_max_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_max_ph(a, b), src)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
+/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
+/// NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmaxph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_max_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_max_ph(a, b), _mm_setzero_ph())
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
+/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
+/// value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmaxph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_max_ph(a: __m256h, b: __m256h) -> __m256h {
+ vmaxph_256(a, b)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
+/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
+/// NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmaxph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_max_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_max_ph(a, b), src)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
+/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
+/// NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmaxph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_max_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_max_ph(a, b), _mm256_setzero_ph())
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
+/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
+/// value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmaxph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_max_ph(a: __m512h, b: __m512h) -> __m512h {
+ _mm512_max_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
+/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
+/// NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmaxph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_max_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_max_ph(a, b), src)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
+/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
+/// NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmaxph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_max_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_max_ph(a, b), _mm512_setzero_ph())
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
+/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
+/// NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_max_round_ph(a: __m512h, b: __m512h) -> __m512h {
+ static_assert_sae!(SAE);
+ vmaxph_512(a, b, SAE)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
+/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
+/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_max_round_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_sae!(SAE);
+ simd_select_bitmask(k, _mm512_max_round_ph::(a, b), src)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
+/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
+/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_max_round_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_sae!(SAE);
+ simd_select_bitmask(k, _mm512_max_round_ph::(a, b), _mm512_setzero_ph())
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum
+/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
+/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value
+/// when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmaxsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_max_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_max_sh(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum
+/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0
+/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow
+/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmaxsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_max_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_max_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
+/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
+/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard
+/// for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmaxsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_max_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_max_sh(_mm_setzero_ph(), k, a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
+/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
+/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_max_round_sh(a: __m128h, b: __m128h) -> __m128h {
+ static_assert_sae!(SAE);
+ _mm_mask_max_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
+/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
+/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
+/// (IEEE 754) maximum value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_max_round_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_sae!(SAE);
+ vmaxsh(a, b, src, k, SAE)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
+/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
+/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
+/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
+/// (IEEE 754) maximum value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_max_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_sae!(SAE);
+ _mm_mask_max_round_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
+/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
+/// when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vminph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_min_ph(a: __m128h, b: __m128h) -> __m128h {
+ vminph_128(a, b)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
+/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
+/// NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vminph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_min_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_min_ph(a, b), src)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
+/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
+/// NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vminph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_min_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ simd_select_bitmask(k, _mm_min_ph(a, b), _mm_setzero_ph())
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
+/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
+/// when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vminph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_min_ph(a: __m256h, b: __m256h) -> __m256h {
+ vminph_256(a, b)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
+/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
+/// NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vminph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_min_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_min_ph(a, b), src)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
+/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
+/// NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vminph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_min_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ simd_select_bitmask(k, _mm256_min_ph(a, b), _mm256_setzero_ph())
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
+/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
+/// when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vminph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_min_ph(a: __m512h, b: __m512h) -> __m512h {
+ _mm512_min_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
+/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
+/// NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vminph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_min_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_min_ph(a, b), src)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
+/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
+/// NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vminph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_min_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ simd_select_bitmask(k, _mm512_min_ph(a, b), _mm512_setzero_ph())
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
+/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not
+/// follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vminph, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_min_round_ph(a: __m512h, b: __m512h) -> __m512h {
+ static_assert_sae!(SAE);
+ vminph_512(a, b, SAE)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
+/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
+/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vminph, SAE = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_min_round_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_sae!(SAE);
+ simd_select_bitmask(k, _mm512_min_round_ph::(a, b), src)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
+/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
+/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vminph, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_min_round_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_sae!(SAE);
+ simd_select_bitmask(k, _mm512_min_round_ph::(a, b), _mm512_setzero_ph())
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum
+/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
+/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when
+/// inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vminsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_min_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_min_sh(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum
+/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0
+/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow
+/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vminsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_min_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_min_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
+/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
+/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard
+/// for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vminsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_min_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_min_sh(_mm_setzero_ph(), k, a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
+/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
+/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_min_round_sh(a: __m128h, b: __m128h) -> __m128h {
+ static_assert_sae!(SAE);
+ _mm_mask_min_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
+/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
+/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
+/// (IEEE 754) minimum value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_min_round_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_sae!(SAE);
+ vminsh(a, b, src, k, SAE)
+}
+
+/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
+/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
+/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
+/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
+/// (IEEE 754) minimum value when inputs are NaN or signed-zero values.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_min_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_sae!(SAE);
+ _mm_mask_min_round_sh::(_mm_setzero_ph(), k, a, b)
+}
+
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
@@ -7276,1941 +8447,2606 @@ extern "C" {
#[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
- #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
- fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
- fn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
- fn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
- fn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
+ fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
+ fn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
+ fn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
+ fn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
+ fn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
+ fn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
+ fn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
+ fn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
+ fn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
+ fn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
+ fn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
+ fn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
+ fn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
+ fn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
+ fn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
+ fn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
+ fn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
+ fn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
+ fn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
+ fn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
+ fn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
+ fn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
+ fn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
+ fn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
+ fn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
+ fn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
+ fn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
+ fn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
+ fn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
+ -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
+ fn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
+ -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
+ fn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
+ fn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
+ fn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.fma.f16"]
+ fn fmaf16(a: f16, b: f16, c: f16) -> f16; // TODO: use `crate::intrinsics::fmaf16` when it's available
+ #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
+ fn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
+
+ #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"]
+ fn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"]
+ fn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
+ fn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128"]
+ fn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256"]
+ fn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512"]
+ fn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.sh"]
+ fn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128"]
+ fn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256"]
+ fn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512"]
+ fn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh"]
+ fn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.sqrt.ph.512"]
+ fn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh"]
+ fn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.max.ph.128"]
+ fn vmaxph_128(a: __m128h, b: __m128h) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.max.ph.256"]
+ fn vmaxph_256(a: __m256h, b: __m256h) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.max.ph.512"]
+ fn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.max.sh.round"]
+ fn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.min.ph.128"]
+ fn vminph_128(a: __m128h, b: __m128h) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.min.ph.256"]
+ fn vminph_256(a: __m256h, b: __m256h) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.min.ph.512"]
+ fn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.min.sh.round"]
+ fn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::x86::*;
+ use crate::mem::transmute;
+ use crate::ptr::{addr_of, addr_of_mut};
+ use stdarch_test::simd_test;
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
+ _mm_setr_ph(re, im, re, im, re, im, re, im)
+ }
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
+ _mm256_setr_ph(
+ re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
+ )
+ }
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
+ _mm512_setr_ph(
+ re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
+ re, im, re, im, re, im, re, im, re, im,
+ )
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set_ph() {
+ let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_set_ph() {
+ let r = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let e = _mm256_setr_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_set_ph() {
+ let r = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let e = _mm512_setr_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set_sh() {
+ let r = _mm_set_sh(1.0);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set1_ph() {
+ let r = _mm_set1_ph(1.0);
+ let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_set1_ph() {
+ let r = _mm256_set1_ph(1.0);
+ let e = _mm256_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_set1_ph() {
+ let r = _mm512_set1_ph(1.0);
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_setr_ph() {
+ let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_setr_ph() {
+ let r = _mm256_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let e = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_setr_ph() {
+ let r = _mm512_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let e = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_setzero_ph() {
+ let r = _mm_setzero_ph();
+ let e = _mm_set1_ph(0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_setzero_ph() {
+ let r = _mm256_setzero_ph();
+ let e = _mm256_set1_ph(0.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_setzero_ph() {
+ let r = _mm512_setzero_ph();
+ let e = _mm512_set1_ph(0.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castsi128_ph() {
+ let a = _mm_set1_epi16(0x3c00);
+ let r = _mm_castsi128_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castsi256_ph() {
+ let a = _mm256_set1_epi16(0x3c00);
+ let r = _mm256_castsi256_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castsi512_ph() {
+ let a = _mm512_set1_epi16(0x3c00);
+ let r = _mm512_castsi512_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_si128() {
+ let a = _mm_set1_ph(1.0);
+ let r = _mm_castph_si128(a);
+ let e = _mm_set1_epi16(0x3c00);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_si256() {
+ let a = _mm256_set1_ph(1.0);
+ let r = _mm256_castph_si256(a);
+ let e = _mm256_set1_epi16(0x3c00);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_si512() {
+ let a = _mm512_set1_ph(1.0);
+ let r = _mm512_castph_si512(a);
+ let e = _mm512_set1_epi16(0x3c00);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castps_ph() {
+ let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
+ let r = _mm_castps_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castps_ph() {
+ let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
+ let r = _mm256_castps_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castps_ph() {
+ let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
+ let r = _mm512_castps_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_ps() {
+ let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
+ let r = _mm_castph_ps(a);
+ let e = _mm_set1_ps(1.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_ps() {
+ let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
+ let r = _mm256_castph_ps(a);
+ let e = _mm256_set1_ps(1.0);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_ps() {
+ let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
+ let r = _mm512_castph_ps(a);
+ let e = _mm512_set1_ps(1.0);
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castpd_ph() {
+ let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
+ let r = _mm_castpd_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castpd_ph() {
+ let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
+ let r = _mm256_castpd_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castpd_ph() {
+ let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
+ let r = _mm512_castpd_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_pd() {
+ let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
+ let r = _mm_castph_pd(a);
+ let e = _mm_set1_pd(1.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_pd() {
+ let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
+ let r = _mm256_castph_pd(a);
+ let e = _mm256_set1_pd(1.0);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_pd() {
+ let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
+ let r = _mm512_castph_pd(a);
+ let e = _mm512_set1_pd(1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph256_ph128() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm256_castph256_ph128(a);
+ let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph512_ph128() {
+ let a = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
+ 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ );
+ let r = _mm512_castph512_ph128(a);
+ let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph512_ph256() {
+ let a = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
+ 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ );
+ let r = _mm512_castph512_ph256(a);
+ let e = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph128_ph256() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm256_castph128_ph256(a);
+ assert_eq_m128h(_mm256_castph256_ph128(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph128_ph512() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_castph128_ph512(a);
+ assert_eq_m128h(_mm512_castph512_ph128(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph256_ph512() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_castph256_ph512(a);
+ assert_eq_m256h(_mm512_castph512_ph256(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_zextph128_ph256() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm256_zextph128_ph256(a);
+ let e = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_zextph128_ph512() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_zextph128_ph512(a);
+ let e = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_zextph256_ph512() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_zextph256_ph512(a);
+ let e = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cmp_round_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cmp_round_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cmp_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cmp_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comi_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comi_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 1);
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
- fn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
- fn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
- fn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
- fn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comieq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comieq_sh(a, b);
+ assert_eq!(r, 1);
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
- fn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
- fn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
- fn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
- fn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comige_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comige_sh(a, b);
+ assert_eq!(r, 1);
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
- fn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
- fn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
- fn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
- fn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comigt_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comigt_sh(a, b);
+ assert_eq!(r, 1);
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
- fn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
- fn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
- fn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
- fn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
- fn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
- fn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
- fn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
- fn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comile_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comile_sh(a, b);
+ assert_eq!(r, 1);
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
- fn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
- fn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
- fn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
- fn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
- fn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
- -> __m512;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
- fn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
- -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
- fn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
- fn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comilt_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comilt_sh(a, b);
+ assert_eq!(r, 1);
+ }
- #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
- fn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.fma.f16"]
- fn fmaf16(a: f16, b: f16, c: f16) -> f16; // TODO: use `crate::intrinsics::fmaf16` when it's available
- #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
- fn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comineq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comineq_sh(a, b);
+ assert_eq!(r, 1);
+ }
- #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"]
- fn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"]
- fn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
- fn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomieq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomieq_sh(a, b);
+ assert_eq!(r, 1);
+ }
-}
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomige_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomige_sh(a, b);
+ assert_eq!(r, 1);
+ }
-#[cfg(test)]
-mod tests {
- use crate::core_arch::x86::*;
- use crate::mem::transmute;
- use crate::ptr::{addr_of, addr_of_mut};
- use stdarch_test::simd_test;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomigt_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomigt_sh(a, b);
+ assert_eq!(r, 1);
+ }
- #[target_feature(enable = "avx512fp16")]
- unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
- _mm_setr_ph(re, im, re, im, re, im, re, im)
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomile_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomile_sh(a, b);
+ assert_eq!(r, 1);
}
- #[target_feature(enable = "avx512fp16")]
- unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
- _mm256_setr_ph(
- re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
- )
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomilt_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomilt_sh(a, b);
+ assert_eq!(r, 1);
}
- #[target_feature(enable = "avx512fp16")]
- unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
- _mm512_setr_ph(
- re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
- re, im, re, im, re, im, re, im, re, im,
- )
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomineq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomineq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_load_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_load_ph(addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_load_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_load_ph(addr_of!(a).cast());
+ assert_eq_m256h(a, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set_ph() {
- let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_load_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_load_ph(addr_of!(a).cast());
+ assert_eq_m512h(a, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_set_ph() {
- let r = _mm256_set_ph(
+ unsafe fn test_mm_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_load_sh(addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let src = _mm_set_sh(2.);
+ let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
+ assert_eq_m128h(src, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
+ assert_eq_m128h(_mm_setzero_ph(), b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_loadu_ph() {
+ let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
+ let r = _mm_loadu_ph(array.as_ptr());
+ let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_loadu_ph() {
+ let array = [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
+ ];
+ let r = _mm256_loadu_ph(array.as_ptr());
let e = _mm256_setr_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_set_ph() {
- let r = _mm512_set_ph(
+ unsafe fn test_mm512_loadu_ph() {
+ let array = [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
- );
+ ];
+ let r = _mm512_loadu_ph(array.as_ptr());
let e = _mm512_setr_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set_sh() {
- let r = _mm_set_sh(1.0);
- let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
+ unsafe fn test_mm_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let r = _mm_move_sh(a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set1_ph() {
- let r = _mm_set1_ph(1.0);
- let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
+ unsafe fn test_mm_mask_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let src = _mm_set_sh(10.0);
+ let r = _mm_mask_move_sh(src, 0, a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_set1_ph() {
- let r = _mm256_set1_ph(1.0);
- let e = _mm256_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ unsafe fn test_mm_maskz_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let r = _mm_maskz_move_sh(0, a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_store_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let mut b = _mm_setzero_ph();
+ _mm_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_store_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- assert_eq_m256h(r, e);
+ let mut b = _mm256_setzero_ph();
+ _mm256_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m256h(a, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_set1_ph() {
- let r = _mm512_set1_ph(1.0);
- let e = _mm512_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ unsafe fn test_mm512_store_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- assert_eq_m512h(r, e);
+ let mut b = _mm512_setzero_ph();
+ _mm512_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m512h(a, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_setr_ph() {
- let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm_store_sh() {
+ let a = _mm_set_sh(1.0);
+ let mut b = _mm_setzero_ph();
+ _mm_store_sh(addr_of_mut!(b).cast(), a);
+ assert_eq_m128h(a, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_setr_ph() {
- let r = _mm256_setr_ph(
+ unsafe fn test_mm_mask_store_sh() {
+ let a = _mm_set_sh(1.0);
+ let mut b = _mm_setzero_ph();
+ _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
+ assert_eq_m128h(_mm_setzero_ph(), b);
+ _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_storeu_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let mut array = [0.0; 8];
+ _mm_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_storeu_ph() {
+ let a = _mm256_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- let e = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- assert_eq_m256h(r, e);
+ let mut array = [0.0; 16];
+ _mm256_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_setr_ph() {
- let r = _mm512_setr_ph(
+ unsafe fn test_mm512_storeu_ph() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
- let e = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- assert_eq_m512h(r, e);
+ let mut array = [0.0; 32];
+ _mm512_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_setzero_ph() {
- let r = _mm_setzero_ph();
- let e = _mm_set1_ph(0.0);
+ unsafe fn test_mm_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_add_ph(a, b);
+ let e = _mm_set1_ph(9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_setzero_ph() {
- let r = _mm256_setzero_ph();
- let e = _mm256_set1_ph(0.0);
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_setzero_ph() {
- let r = _mm512_setzero_ph();
- let e = _mm512_set1_ph(0.0);
- assert_eq_m512h(r, e);
+ unsafe fn test_mm_mask_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_add_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castsi128_ph() {
- let a = _mm_set1_epi16(0x3c00);
- let r = _mm_castsi128_ph(a);
- let e = _mm_set1_ph(1.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_add_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castsi256_ph() {
- let a = _mm256_set1_epi16(0x3c00);
- let r = _mm256_castsi256_ph(a);
- let e = _mm256_set1_ph(1.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_add_ph(a, b);
+ let e = _mm256_set1_ph(17.0);
assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castsi512_ph() {
- let a = _mm512_set1_epi16(0x3c00);
- let r = _mm512_castsi512_ph(a);
- let e = _mm512_set1_ph(1.0);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_si128() {
- let a = _mm_set1_ph(1.0);
- let r = _mm_castph_si128(a);
- let e = _mm_set1_epi16(0x3c00);
- assert_eq_m128i(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_si256() {
- let a = _mm256_set1_ph(1.0);
- let r = _mm256_castph_si256(a);
- let e = _mm256_set1_epi16(0x3c00);
- assert_eq_m256i(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_si512() {
- let a = _mm512_set1_ph(1.0);
- let r = _mm512_castph_si512(a);
- let e = _mm512_set1_epi16(0x3c00);
- assert_eq_m512i(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castps_ph() {
- let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
- let r = _mm_castps_ph(a);
- let e = _mm_set1_ph(1.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_add_ph(a, b);
+ let e = _mm512_set1_ph(33.0);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castps_ph() {
- let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
- let r = _mm256_castps_ph(a);
- let e = _mm256_set1_ph(1.0);
- assert_eq_m256h(r, e);
+ unsafe fn test_mm512_mask_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
+ 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castps_ph() {
- let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
- let r = _mm512_castps_ph(a);
- let e = _mm512_set1_ph(1.0);
+ unsafe fn test_mm512_maskz_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
+ 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_ps() {
- let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
- let r = _mm_castph_ps(a);
- let e = _mm_set1_ps(1.0);
- assert_eq_m128(r, e);
+ unsafe fn test_mm512_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(33.0);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_ps() {
- let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
- let r = _mm256_castph_ps(a);
- let e = _mm256_set1_ps(1.0);
- assert_eq_m256(r, e);
+ unsafe fn test_mm512_mask_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
+ 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_ps() {
- let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
- let r = _mm512_castph_ps(a);
- let e = _mm512_set1_ps(1.0);
- assert_eq_m512(r, e);
+ unsafe fn test_mm512_maskz_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
+ 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castpd_ph() {
- let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
- let r = _mm_castpd_ph(a);
- let e = _mm_set1_ph(1.0);
+ unsafe fn test_mm_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castpd_ph() {
- let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
- let r = _mm256_castpd_ph(a);
- let e = _mm256_set1_ph(1.0);
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_mask_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castpd_ph() {
- let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
- let r = _mm512_castpd_ph(a);
- let e = _mm512_set1_ph(1.0);
- assert_eq_m512h(r, e);
+ unsafe fn test_mm_maskz_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_pd() {
- let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
- let r = _mm_castph_pd(a);
- let e = _mm_set1_pd(1.0);
- assert_eq_m128d(r, e);
+ unsafe fn test_mm_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_add_sh(a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_pd() {
- let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
- let r = _mm256_castph_pd(a);
- let e = _mm256_set1_pd(1.0);
- assert_eq_m256d(r, e);
+ unsafe fn test_mm_mask_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_add_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_add_sh(src, 1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_pd() {
- let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
- let r = _mm512_castph_pd(a);
- let e = _mm512_set1_pd(1.0);
- assert_eq_m512d(r, e);
+ unsafe fn test_mm_maskz_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_add_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_add_sh(1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph256_ph128() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
- );
- let r = _mm256_castph256_ph128(a);
- let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_sub_ph(a, b);
+ let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph512_ph128() {
- let a = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
- 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
- );
- let r = _mm512_castph512_ph128(a);
- let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph512_ph256() {
- let a = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
- 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_sub_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
);
- let r = _mm512_castph512_ph256(a);
- let e = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ let r = _mm256_sub_ph(a, b);
+ let e = _mm256_set_ph(
+ -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
+ 15.0,
);
assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph128_ph256() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm256_castph128_ph256(a);
- assert_eq_m128h(_mm256_castph256_ph128(r), a);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph128_ph512() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm512_castph128_ph512(a);
- assert_eq_m128h(_mm512_castph512_ph128(r), a);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph256_ph512() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- let r = _mm512_castph256_ph512(a);
- assert_eq_m256h(_mm512_castph512_ph256(r), a);
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_zextph128_ph256() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm256_zextph128_ph256(a);
- let e = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_zextph128_ph512() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm512_zextph128_ph512(a);
- let e = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
- 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ unsafe fn test_mm512_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_sub_ph(a, b);
+ let e = _mm512_set_ph(
+ -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
+ -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
+ 23.0, 25.0, 27.0, 29.0, 31.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_zextph256_ph512() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ unsafe fn test_mm512_mask_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- let r = _mm512_zextph256_ph512(a);
- let e = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
- 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
+ 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmp_round_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmp_round_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
- assert_eq!(r, 0);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmp_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmp_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
- assert_eq!(r, 0);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comi_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comi_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comieq_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comieq_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_maskz_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
+ 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comige_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comige_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set_ph(
+ -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
+ -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
+ 23.0, 25.0, 27.0, 29.0, 31.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comigt_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comigt_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_mask_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
+ 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comile_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_comile_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_maskz_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
+ 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comilt_sh() {
+ unsafe fn test_mm_sub_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_comilt_sh(a, b);
- assert_eq!(r, 1);
+ let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comineq_sh() {
+ unsafe fn test_mm_mask_sub_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_comineq_sh(a, b);
- assert_eq!(r, 1);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomieq_sh() {
+ unsafe fn test_mm_maskz_sub_round_sh() {
let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomieq_sh(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomige_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomige_sh(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomigt_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomigt_sh(a, b);
- assert_eq!(r, 1);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomile_sh() {
+ unsafe fn test_mm_sub_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_ucomile_sh(a, b);
- assert_eq!(r, 1);
+ let r = _mm_sub_sh(a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomilt_sh() {
+ unsafe fn test_mm_mask_sub_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_ucomilt_sh(a, b);
- assert_eq!(r, 1);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_sub_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_sub_sh(src, 1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomineq_sh() {
+ unsafe fn test_mm_maskz_sub_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_ucomineq_sh(a, b);
- assert_eq!(r, 1);
+ let r = _mm_maskz_sub_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_sub_sh(1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_load_ph() {
+ unsafe fn test_mm_mul_ph() {
let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_load_ph(addr_of!(a).cast());
- assert_eq_m128h(a, b);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_mul_ph(a, b);
+ let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_load_ph() {
+ unsafe fn test_mm_mask_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_mul_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mul_ph() {
let a = _mm256_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- let b = _mm256_load_ph(addr_of!(a).cast());
- assert_eq_m256h(a, b);
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_mul_ph(a, b);
+ let e = _mm256_set_ph(
+ 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
+ 30.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_load_ph() {
+ unsafe fn test_mm512_mul_ph() {
let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
- let b = _mm512_load_ph(addr_of!(a).cast());
- assert_eq_m512h(a, b);
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_mul_ph(a, b);
+ let e = _mm512_set_ph(
+ 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
+ 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
+ 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_load_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_load_sh(addr_of!(a).cast());
- assert_eq_m128h(a, b);
+ unsafe fn test_mm512_mask_mul_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
+ 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_load_sh() {
- let a = _mm_set_sh(1.0);
- let src = _mm_set_sh(2.);
- let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
- assert_eq_m128h(a, b);
- let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
- assert_eq_m128h(src, b);
+ unsafe fn test_mm512_maskz_mul_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
+ 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_load_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
- assert_eq_m128h(a, b);
- let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
- assert_eq_m128h(_mm_setzero_ph(), b);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_loadu_ph() {
- let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
- let r = _mm_loadu_ph(array.as_ptr());
- let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_loadu_ph() {
- let array = [
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- ];
- let r = _mm256_loadu_ph(array.as_ptr());
- let e = _mm256_setr_ph(
+ unsafe fn test_mm512_mul_round_ph() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set_ph(
+ 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
+ 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
+ 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
);
- assert_eq_m256h(r, e);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_loadu_ph() {
- let array = [
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- ];
- let r = _mm512_loadu_ph(array.as_ptr());
- let e = _mm512_setr_ph(
+ unsafe fn test_mm512_mask_mul_round_ph() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
+ 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let r = _mm_move_sh(a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_maskz_mul_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
+ 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let src = _mm_set_sh(10.0);
- let r = _mm_mask_move_sh(src, 0, a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
+ unsafe fn test_mm_mul_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let r = _mm_maskz_move_sh(0, a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
+ unsafe fn test_mm_mask_mul_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_store_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let mut b = _mm_setzero_ph();
- _mm_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m128h(a, b);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_store_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
);
- let mut b = _mm256_setzero_ph();
- _mm256_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m256h(a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_store_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let mut b = _mm512_setzero_ph();
- _mm512_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m512h(a, b);
+ unsafe fn test_mm_maskz_mul_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_store_sh() {
+ unsafe fn test_mm_mul_sh() {
let a = _mm_set_sh(1.0);
- let mut b = _mm_setzero_ph();
- _mm_store_sh(addr_of_mut!(b).cast(), a);
- assert_eq_m128h(a, b);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_mul_sh(a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_store_sh() {
+ unsafe fn test_mm_mask_mul_sh() {
let a = _mm_set_sh(1.0);
- let mut b = _mm_setzero_ph();
- _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
- assert_eq_m128h(_mm_setzero_ph(), b);
- _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
- assert_eq_m128h(a, b);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_storeu_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let mut array = [0.0; 8];
- _mm_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_storeu_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let mut array = [0.0; 16];
- _mm256_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_mul_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_mul_sh(src, 1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_storeu_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let mut array = [0.0; 32];
- _mm512_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
+ unsafe fn test_mm_maskz_mul_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_mul_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_mul_sh(1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_add_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_add_ph(a, b);
- let e = _mm_set1_ph(9.0);
+ unsafe fn test_mm_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let r = _mm_div_ph(a, b);
+ let e = _mm_set1_ph(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_add_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_add_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
+ unsafe fn test_mm_mask_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
+ let r = _mm_mask_div_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_add_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_add_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
+ unsafe fn test_mm_maskz_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let r = _mm_maskz_div_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_add_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_add_ph(a, b);
- let e = _mm256_set1_ph(17.0);
+ unsafe fn test_mm256_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let r = _mm256_div_ph(a, b);
+ let e = _mm256_set1_ph(0.5);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_add_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
+ unsafe fn test_mm256_mask_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0,
);
- let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
+ let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_add_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
+ unsafe fn test_mm256_maskz_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_add_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_add_ph(a, b);
- let e = _mm512_set1_ph(33.0);
+ unsafe fn test_mm512_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_div_ph(a, b);
+ let e = _mm512_set1_ph(0.5);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_add_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
+ unsafe fn test_mm512_mask_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
+ 33.0, 34.0, 35.0,
);
- let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
+ let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
- 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_add_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_maskz_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
- 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_add_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(33.0);
+ unsafe fn test_mm512_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(0.5);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_add_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
+ unsafe fn test_mm512_mask_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
+ 33.0, 34.0, 35.0,
);
- let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
0b01010101010101010101010101010101,
a,
b,
);
let e = _mm512_set_ph(
- 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
- 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_add_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
b,
);
let e = _mm512_set_ph(
- 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
- 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_add_round_sh() {
+ unsafe fn test_mm_div_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(3.0);
+ let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_add_round_sh() {
+ unsafe fn test_mm_mask_div_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
let src = _mm_set_sh(4.0);
- let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
let e = _mm_set_sh(4.0);
assert_eq_m128h(r, e);
- let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 1, a, b,
);
- let e = _mm_set_sh(3.0);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_add_round_sh() {
+ unsafe fn test_mm_maskz_div_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
let r =
- _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
let e = _mm_set_sh(0.0);
assert_eq_m128h(r, e);
let r =
- _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(3.0);
+ _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_add_sh() {
+ unsafe fn test_mm_div_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_add_sh(a, b);
- let e = _mm_set_sh(3.0);
+ let r = _mm_div_sh(a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_add_sh() {
+ unsafe fn test_mm_mask_div_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
let src = _mm_set_sh(4.0);
- let r = _mm_mask_add_sh(src, 0, a, b);
+ let r = _mm_mask_div_sh(src, 0, a, b);
let e = _mm_set_sh(4.0);
assert_eq_m128h(r, e);
- let r = _mm_mask_add_sh(src, 1, a, b);
- let e = _mm_set_sh(3.0);
+ let r = _mm_mask_div_sh(src, 1, a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_add_sh() {
+ unsafe fn test_mm_maskz_div_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_maskz_add_sh(0, a, b);
+ let r = _mm_maskz_div_sh(0, a, b);
let e = _mm_set_sh(0.0);
assert_eq_m128h(r, e);
- let r = _mm_maskz_add_sh(1, a, b);
- let e = _mm_set_sh(3.0);
+ let r = _mm_maskz_div_sh(1, a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_sub_ph(a, b);
- let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
+ unsafe fn test_mm_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_mul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
+ unsafe fn test_mm_mask_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_mul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_sub_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
+ unsafe fn test_mm_maskz_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_maskz_mul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_sub_ph(a, b);
- let e = _mm256_set_ph(
- -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
- 15.0,
- );
+ unsafe fn test_mm256_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_mul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ unsafe fn test_mm256_mask_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
+ let r = _mm256_mask_mul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
+ unsafe fn test_mm256_maskz_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_mul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_sub_ph(a, b);
- let e = _mm512_set_ph(
- -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
- -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
- 23.0, 25.0, 27.0, 29.0, 31.0,
- );
+ unsafe fn test_mm512_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_mul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
- 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
- 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ unsafe fn test_mm512_maskz_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set_ph(
- -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
- -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
- 23.0, 25.0, 27.0, 29.0, 31.0,
- );
+ unsafe fn test_mm512_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
- 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
- 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(-1.0);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm_maskz_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
let r =
- _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(-1.0);
+ _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_sub_sh(a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_mul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_sub_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_sub_sh(src, 1, a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_mask_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_mul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_sub_sh(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_sub_sh(1, a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_maskz_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_maskz_mul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_mul_ph(a, b);
- let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
+ unsafe fn test_mm_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_fmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
+ unsafe fn test_mm_mask_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_fmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_mul_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
+ unsafe fn test_mm_maskz_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_maskz_fmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_mul_ph(a, b);
- let e = _mm256_set_ph(
- 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
- 30.0, 16.0,
- );
+ unsafe fn test_mm256_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_fmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ unsafe fn test_mm256_mask_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
+ let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
+ unsafe fn test_mm256_maskz_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_mul_ph(a, b);
- let e = _mm512_set_ph(
- 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
- 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
- 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
- );
+ unsafe fn test_mm512_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_fmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
- 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
- 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ unsafe fn test_mm512_maskz_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set_ph(
- 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
- 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
- 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
- );
+ unsafe fn test_mm512_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
- 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
- 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(2.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r =
- _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(2.0);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_mul_sh(a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_maskz_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r =
+ _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_mul_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_mul_sh(src, 1, a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_fmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_mul_sh(0, a, b);
- let e = _mm_set_sh(0.0);
+ unsafe fn test_mm_mask_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
- let r = _mm_maskz_mul_sh(1, a, b);
- let e = _mm_set_sh(2.0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_maskz_fmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_div_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let r = _mm_div_ph(a, b);
- let e = _mm_set1_ph(0.5);
+ unsafe fn test_mm_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_cmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_div_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
- let r = _mm_mask_div_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
+ unsafe fn test_mm_mask_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_cmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_div_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let r = _mm_maskz_div_ph(0b01010101, a, b);
- let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ unsafe fn test_mm_maskz_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_maskz_cmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_div_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let r = _mm256_div_ph(a, b);
- let e = _mm256_set1_ph(0.5);
+ unsafe fn test_mm256_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_cmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_div_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let src = _mm256_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0,
+ unsafe fn test_mm256_mask_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_div_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ unsafe fn test_mm256_maskz_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_div_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_div_ph(a, b);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_cmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_div_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let src = _mm512_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
- 33.0, 34.0, 35.0,
+ unsafe fn test_mm512_mask_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
- 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_div_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ unsafe fn test_mm512_maskz_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_div_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_div_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let src = _mm512_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
- 33.0, 34.0, 35.0,
+ unsafe fn test_mm512_mask_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
- 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_div_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(0.5);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_cmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r =
- _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_mask_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_cmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_div_sh(a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_maskz_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_maskz_cmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_div_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_div_sh(src, 1, a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_div_sh(0, a, b);
- let e = _mm_set_sh(0.0);
+ unsafe fn test_mm_mask_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
- let r = _mm_maskz_div_sh(1, a, b);
- let e = _mm_set_sh(0.5);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r =
+ _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mul_pch() {
+ unsafe fn test_mm_fcmul_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_mul_pch(a, b);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_fcmul_pch(a, b);
let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_mul_pch() {
+ unsafe fn test_mm_mask_fcmul_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_mul_pch(src, 0b0101, a, b);
+ let r = _mm_mask_fcmul_pch(src, 0b0101, a, b);
let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_mul_pch() {
+ unsafe fn test_mm_maskz_fcmul_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_maskz_mul_pch(0b0101, a, b);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_maskz_fcmul_pch(0b0101, a, b);
let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mul_pch() {
+ unsafe fn test_mm256_fcmul_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_mul_pch(a, b);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_fcmul_pch(a, b);
let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_mul_pch() {
+ unsafe fn test_mm256_mask_fcmul_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
let src = _mm256_setr_ph(
2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_mul_pch(src, 0b01010101, a, b);
+ let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b);
let e = _mm256_setr_ph(
-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
@@ -9218,10 +11054,10 @@ mod tests {
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_mul_pch() {
+ unsafe fn test_mm256_maskz_fcmul_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_maskz_mul_pch(0b01010101, a, b);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
let e = _mm256_setr_ph(
-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
@@ -9229,24 +11065,24 @@ mod tests {
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_pch() {
+ unsafe fn test_mm512_fcmul_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_mul_pch(a, b);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_fcmul_pch(a, b);
let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_pch() {
+ unsafe fn test_mm512_mask_fcmul_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
let src = _mm512_setr_ph(
2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
32.0, 33.0,
);
- let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b);
+ let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b);
let e = _mm512_setr_ph(
-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
-1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
@@ -9256,10 +11092,10 @@ mod tests {
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_pch() {
+ unsafe fn test_mm512_maskz_fcmul_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
let e = _mm512_setr_ph(
-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
@@ -9268,24 +11104,24 @@ mod tests {
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_round_pch() {
+ unsafe fn test_mm512_fcmul_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_round_pch() {
+ unsafe fn test_mm512_mask_fcmul_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
let src = _mm512_setr_ph(
2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
32.0, 33.0,
);
- let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
0b0101010101010101,
a,
@@ -9300,10 +11136,10 @@ mod tests {
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_round_pch() {
+ unsafe fn test_mm512_maskz_fcmul_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b0101010101010101,
a,
b,
@@ -9316,3088 +11152,3321 @@ mod tests {
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_round_sch() {
+ unsafe fn test_mm_fcmul_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_fcmul_sch(a, b);
let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_round_sch() {
+ unsafe fn test_mm_mask_fcmul_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fcmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fcmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_maskz_fcmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_fcmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_fcmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fcmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r =
+ _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_abs_ph() {
+ let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0);
+ let r = _mm_abs_ph(a);
+ let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_abs_ph() {
+ let a = _mm256_set_ph(
+ -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
+ -14.0,
+ );
+ let r = _mm256_abs_ph(a);
+ let e = _mm256_set_ph(
+ 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_abs_ph() {
+ let a = _mm512_set_ph(
+ -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
+ -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0,
+ 27.0, -28.0, 29.0, -30.0,
+ );
+ let r = _mm512_abs_ph(a);
+ let e = _mm512_set_ph(
+ 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
+ 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0,
+ 29.0, 30.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_conj_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_conj_pch(a);
+ let e = _mm_set1_pch(0.0, -1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_conj_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_conj_pch(src, 0b0101, a);
+ let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_conj_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_maskz_conj_pch(0b0101, a);
+ let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_conj_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_conj_pch(a);
+ let e = _mm256_set1_pch(0.0, -1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_conj_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ );
+ let r = _mm256_mask_conj_pch(src, 0b01010101, a);
+ let e = _mm256_setr_ph(
+ 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r =
- _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_conj_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_conj_pch(0b01010101, a);
+ let e = _mm256_setr_ph(
+ 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_mul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_conj_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_conj_pch(a);
+ let e = _mm512_set1_pch(0.0, -1.0);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_mul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_mask_conj_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a);
+ let e = _mm512_setr_ph(
+ 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
+ 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0,
+ 33.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_maskz_mul_sch(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm512_maskz_conj_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_conj_pch(0b0101010101010101, a);
+ let e = _mm512_setr_ph(
+ 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
+ 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_fmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_fmadd_pch(a, b, c);
+ let e = _mm_set1_pch(-2.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmul_pch() {
+ unsafe fn test_mm_mask_fmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_fmul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask_fmadd_pch(a, 0b0101, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmul_pch() {
+ unsafe fn test_mm_mask3_fmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_fmul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101);
+ let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmul_pch() {
+ unsafe fn test_mm_maskz_fmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_maskz_fmul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_maskz_fmadd_pch(0b0101, a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmul_pch() {
+ unsafe fn test_mm256_fmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_fmul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_fmadd_pch(a, b, c);
+ let e = _mm256_set1_pch(-2.0, 3.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmul_pch() {
+ unsafe fn test_mm256_mask_fmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c);
+ let e = _mm256_setr_ph(
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
);
- let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101);
let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmul_pch() {
+ unsafe fn test_mm256_maskz_fmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c);
let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmul_pch() {
+ unsafe fn test_mm512_fmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_fmul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_fmadd_pch(a, b, c);
+ let e = _mm512_set1_pch(-2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmul_pch() {
+ unsafe fn test_mm512_mask_fmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c);
+ let e = _mm512_setr_ph(
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
);
- let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmul_pch() {
+ unsafe fn test_mm512_maskz_fmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmul_round_pch() {
+ unsafe fn test_mm512_fmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r =
+ _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pch(-2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmul_round_pch() {
+ unsafe fn test_mm512_mask_fmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
0b0101010101010101,
+ b,
+ c,
+ );
+ let e = _mm512_setr_ph(
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b0101010101010101,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmul_round_pch() {
+ unsafe fn test_mm512_maskz_fmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b0101010101010101,
a,
b,
+ c,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmul_round_sch() {
+ unsafe fn test_mm_fmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fmadd_sch(a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_fmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fmadd_sch(a, 0, b, c);
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fmadd_sch(a, 1, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmul_round_sch() {
+ unsafe fn test_mm_mask3_fmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fmadd_sch(a, b, c, 0);
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fmadd_sch(a, b, c, 1);
+ let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmul_round_sch() {
+ unsafe fn test_mm_maskz_fmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r =
- _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fmadd_sch(0, a, b, c);
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmadd_sch(1, a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmul_sch() {
+ unsafe fn test_mm_fmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_fmul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmul_sch() {
+ unsafe fn test_mm_mask_fmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fmul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmul_sch() {
+ unsafe fn test_mm_mask3_fmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_maskz_fmul_sch(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
+ );
+ let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_cmul_pch() {
+ unsafe fn test_mm_fcmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_cmul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_fcmadd_pch(a, b, c);
+ let e = _mm_set1_pch(2.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_cmul_pch() {
+ unsafe fn test_mm_mask_fcmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_cmul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_cmul_pch() {
+ unsafe fn test_mm_mask3_fcmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_maskz_cmul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101);
+ let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_cmul_pch() {
+ unsafe fn test_mm_maskz_fcmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fcmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_cmul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_fcmadd_pch(a, b, c);
+ let e = _mm256_set1_pch(2.0, 3.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_cmul_pch() {
+ unsafe fn test_mm256_mask_fcmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c);
+ let e = _mm256_setr_ph(
+ 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
);
- let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fcmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101);
let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_cmul_pch() {
+ unsafe fn test_mm256_maskz_fcmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c);
let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_cmul_pch() {
+ unsafe fn test_mm512_fcmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_cmul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_fcmadd_pch(a, b, c);
+ let e = _mm512_set1_pch(2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_cmul_pch() {
+ unsafe fn test_mm512_mask_fcmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c);
+ let e = _mm512_setr_ph(
+ 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
+ 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
);
- let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fcmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
+ 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_cmul_pch() {
+ unsafe fn test_mm512_maskz_fcmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
+ 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_cmul_round_pch() {
+ unsafe fn test_mm512_fcmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r =
+ _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pch(2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_cmul_round_pch() {
+ unsafe fn test_mm512_mask_fcmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
0b0101010101010101,
+ b,
+ c,
+ );
+ let e = _mm512_setr_ph(
+ 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
+ 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fcmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b0101010101010101,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
+ 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_cmul_round_pch() {
+ unsafe fn test_mm512_maskz_fcmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b0101010101010101,
a,
b,
+ c,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
+ 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmul_sch() {
+ unsafe fn test_mm_fcmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_cmul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fcmadd_sch(a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmul_sch() {
+ unsafe fn test_mm_mask_fcmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_cmul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fcmadd_sch(a, 0, b, c);
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fcmadd_sch(a, 1, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_cmul_sch() {
+ unsafe fn test_mm_mask3_fcmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_maskz_cmul_sch(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fcmadd_sch(a, b, c, 0);
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fcmadd_sch(a, b, c, 1);
+ let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fcmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fcmadd_sch(0, a, b, c);
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fcmadd_sch(1, a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmul_round_sch() {
+ unsafe fn test_mm_fcmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmul_round_sch() {
+ unsafe fn test_mm_mask_fcmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_cmul_round_sch() {
+ unsafe fn test_mm_mask3_fcmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r =
- _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
+ );
+ let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fcmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fcmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_fcmul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fmadd_ph(a, b, c);
+ let e = _mm_set1_ph(5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fcmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_fcmul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ unsafe fn test_mm_mask_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fcmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_maskz_fcmul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ unsafe fn test_mm_mask3_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fcmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_fcmul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm_maskz_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fmadd_ph(a, b, c);
+ let e = _mm256_set1_ph(5.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fcmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ unsafe fn test_mm256_mask_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c);
+ let e = _mm256_set_ph(
+ 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
);
- let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101);
+ let e = _mm256_set_ph(
+ 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fcmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm256_maskz_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_fcmul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm512_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmadd_ph(a, b, c);
+ let e = _mm512_set1_ph(5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ unsafe fn test_mm512_mask_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let e = _mm512_set_ph(
+ 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
+ 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm512_mask3_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let e = _mm512_set_ph(
+ 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
+ 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm512_maskz_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
+ 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
- 0b0101010101010101,
+ unsafe fn test_mm512_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(5.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
+ 0b01010101010101010101010101010101,
b,
+ c,
);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ let e = _mm512_set_ph(
+ 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
+ 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
+ unsafe fn test_mm512_mask3_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b01010101010101010101010101010101,
);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ let e = _mm512_set_ph(
+ 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
+ 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_fcmul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fcmul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_maskz_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ c,
+ );
+ let e = _mm512_set_ph(
+ 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
+ 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_maskz_fcmul_sch(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmadd_sh(a, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmadd_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_mask_fmadd_sh(a, 1, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r =
- _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask3_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmadd_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_abs_ph() {
- let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0);
- let r = _mm_abs_ph(a);
- let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
+ let r = _mm_mask3_fmadd_sh(a, b, c, 1);
+ let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_abs_ph() {
- let a = _mm256_set_ph(
- -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
- -14.0,
- );
- let r = _mm256_abs_ph(a);
- let e = _mm256_set_ph(
- 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
- );
- assert_eq_m256h(r, e);
- }
-
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_abs_ph() {
- let a = _mm512_set_ph(
- -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
- -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0,
- 27.0, -28.0, 29.0, -30.0,
- );
- let r = _mm512_abs_ph(a);
- let e = _mm512_set_ph(
- 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
- 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0,
- 29.0, 30.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_conj_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let r = _mm_conj_pch(a);
- let e = _mm_set1_pch(0.0, -1.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_conj_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_conj_pch(src, 0b0101, a);
- let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0);
+ unsafe fn test_mm_maskz_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmadd_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_conj_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let r = _mm_maskz_conj_pch(0b0101, a);
- let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0);
+ let r = _mm_maskz_fmadd_sh(1, a, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_conj_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_conj_pch(a);
- let e = _mm256_set1_pch(0.0, -1.0);
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_conj_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- );
- let r = _mm256_mask_conj_pch(src, 0b01010101, a);
- let e = _mm256_setr_ph(
- 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
- );
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_conj_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_maskz_conj_pch(0b01010101, a);
- let e = _mm256_setr_ph(
- 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
- );
- assert_eq_m256h(r, e);
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_conj_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_conj_pch(a);
- let e = _mm512_set1_pch(0.0, -1.0);
- assert_eq_m512h(r, e);
+ unsafe fn test_mm_mask_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_conj_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ unsafe fn test_mm_mask3_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
);
- let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a);
- let e = _mm512_setr_ph(
- 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
- 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0,
- 33.0,
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_conj_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_conj_pch(0b0101010101010101, a);
- let e = _mm512_setr_ph(
- 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
- 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
+ unsafe fn test_mm_maskz_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_fmadd_pch(a, b, c);
- let e = _mm_set1_pch(-2.0, 3.0);
+ unsafe fn test_mm_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fmsub_ph(a, b, c);
+ let e = _mm_set1_ph(-1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask_fmadd_pch(a, 0b0101, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0);
+ unsafe fn test_mm_mask_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101);
- let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0);
+ unsafe fn test_mm_mask3_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_maskz_fmadd_pch(0b0101, a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0);
+ unsafe fn test_mm_maskz_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_fmadd_pch(a, b, c);
- let e = _mm256_set1_pch(-2.0, 3.0);
+ unsafe fn test_mm256_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fmsub_ph(a, b, c);
+ let e = _mm256_set1_ph(-1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c);
- let e = _mm256_setr_ph(
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ unsafe fn test_mm256_mask_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c);
+ let e = _mm256_set_ph(
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101);
- let e = _mm256_setr_ph(
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm256_mask3_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101);
+ let e = _mm256_set_ph(
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c);
- let e = _mm256_setr_ph(
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm256_maskz_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_fmadd_pch(a, b, c);
- let e = _mm512_set1_pch(-2.0, 3.0);
+ unsafe fn test_mm512_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmsub_ph(a, b, c);
+ let e = _mm512_set1_ph(-1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ unsafe fn test_mm512_mask_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c);
+ let e = _mm512_set_ph(
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm512_mask3_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101);
+ let e = _mm512_set_ph(
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm512_maskz_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r =
- _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_pch(-2.0, 3.0);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(-1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
b,
c,
);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ let e = _mm512_set_ph(
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask3_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ let e = _mm512_set_ph(
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
+ unsafe fn test_mm512_maskz_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
c,
);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ let e = _mm512_set_ph(
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fmadd_sch(a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmsub_sh(a, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fmadd_sch(a, 0, b, c);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmsub_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_sch(a, 1, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_mask_fmsub_sh(a, 1, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fmadd_sch(a, b, c, 0);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ unsafe fn test_mm_mask3_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmsub_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_sch(a, b, c, 1);
- let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fmsub_sh(a, b, c, 1);
+ let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fmadd_sch(0, a, b, c);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_maskz_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmsub_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_sch(1, a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_maskz_fmsub_sh(1, a, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 0, b, c,
);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 1, b, c,
);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask3_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 0,
);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 1,
);
- let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_maskz_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0, a, b, c,
);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
1, a, b, c,
);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_fcmadd_pch(a, b, c);
- let e = _mm_set1_pch(2.0, 3.0);
+ unsafe fn test_mm_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fnmadd_ph(a, b, c);
+ let e = _mm_set1_ph(1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0);
+ unsafe fn test_mm_mask_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101);
- let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0);
+ unsafe fn test_mm_mask3_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0);
+ unsafe fn test_mm_maskz_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_fcmadd_pch(a, b, c);
- let e = _mm256_set1_pch(2.0, 3.0);
+ unsafe fn test_mm256_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fnmadd_ph(a, b, c);
+ let e = _mm256_set1_ph(1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c);
- let e = _mm256_setr_ph(
- 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ unsafe fn test_mm256_mask_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c);
+ let e = _mm256_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101);
- let e = _mm256_setr_ph(
- 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm256_mask3_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101);
+ let e = _mm256_set_ph(
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c);
- let e = _mm256_setr_ph(
- 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm256_maskz_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_fcmadd_pch(a, b, c);
- let e = _mm512_set1_pch(2.0, 3.0);
+ unsafe fn test_mm512_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fnmadd_ph(a, b, c);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
- 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ unsafe fn test_mm512_mask_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
- 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm512_mask3_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let e = _mm512_set_ph(
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
- 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm512_maskz_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
+ unsafe fn test_mm512_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
let r =
- _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_pch(2.0, 3.0);
+ _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
b,
c,
);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
- 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask3_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
- 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
+ let e = _mm512_set_ph(
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
+ unsafe fn test_mm512_maskz_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
c,
);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
- 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
+ let e = _mm512_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fcmadd_sch(a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fnmadd_sh(a, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fcmadd_sch(a, 0, b, c);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fnmadd_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fcmadd_sch(a, 1, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_mask_fnmadd_sh(a, 1, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fcmadd_sch(a, b, c, 0);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ unsafe fn test_mm_mask3_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmadd_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fcmadd_sch(a, b, c, 1);
- let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fnmadd_sh(a, b, c, 1);
+ let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fcmadd_sch(0, a, b, c);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_maskz_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fnmadd_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fcmadd_sch(1, a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_maskz_fnmadd_sh(1, a, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 0, b, c,
);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 1, b, c,
);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask3_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 0,
);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 1,
);
- let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_maskz_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0, a, b, c,
);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
1, a, b, c,
);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmadd_ph() {
+ unsafe fn test_mm_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_fmadd_ph(a, b, c);
- let e = _mm_set1_ph(5.0);
+ let r = _mm_fnmsub_ph(a, b, c);
+ let e = _mm_set1_ph(-5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmadd_ph() {
+ unsafe fn test_mm_mask_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0);
+ let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmadd_ph() {
+ unsafe fn test_mm_mask3_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0);
+ let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmadd_ph() {
+ unsafe fn test_mm_maskz_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0);
+ let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmadd_ph() {
+ unsafe fn test_mm256_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmadd_ph(a, b, c);
- let e = _mm256_set1_ph(5.0);
+ let r = _mm256_fnmsub_ph(a, b, c);
+ let e = _mm256_set1_ph(-5.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmadd_ph() {
+ unsafe fn test_mm256_mask_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c);
+ let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c);
let e = _mm256_set_ph(
- 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmadd_ph() {
+ unsafe fn test_mm256_mask3_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101);
+ let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101);
let e = _mm256_set_ph(
- 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmadd_ph() {
+ unsafe fn test_mm256_maskz_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c);
+ let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c);
let e = _mm256_set_ph(
- 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_ph() {
+ unsafe fn test_mm512_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmadd_ph(a, b, c);
- let e = _mm512_set1_ph(5.0);
+ let r = _mm512_fnmsub_ph(a, b, c);
+ let e = _mm512_set1_ph(-5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_ph() {
+ unsafe fn test_mm512_mask_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c);
let e = _mm512_set_ph(
- 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
- 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_ph() {
+ unsafe fn test_mm512_mask3_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101);
let e = _mm512_set_ph(
- 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
- 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_ph() {
+ unsafe fn test_mm512_maskz_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c);
let e = _mm512_set_ph(
- 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
- 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_round_ph() {
+ unsafe fn test_mm512_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(5.0);
+ let r =
+ _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(-5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_round_ph() {
+ unsafe fn test_mm512_mask_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
0b01010101010101010101010101010101,
b,
c,
);
let e = _mm512_set_ph(
- 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
- 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_round_ph() {
+ unsafe fn test_mm512_mask3_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
0b01010101010101010101010101010101,
);
let e = _mm512_set_ph(
- 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
- 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_round_ph() {
+ unsafe fn test_mm512_maskz_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
b,
c,
);
let e = _mm512_set_ph(
- 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
- 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_sh() {
+ unsafe fn test_mm_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmadd_sh(a, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_fnmsub_sh(a, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_sh() {
+ unsafe fn test_mm_mask_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmadd_sh(a, 0, b, c);
+ let r = _mm_mask_fnmsub_sh(a, 0, b, c);
let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_sh(a, 1, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_mask_fnmsub_sh(a, 1, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_sh() {
+ unsafe fn test_mm_mask3_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmadd_sh(a, b, c, 0);
+ let r = _mm_mask3_fnmsub_sh(a, b, c, 0);
let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_sh(a, b, c, 1);
- let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmsub_sh(a, b, c, 1);
+ let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_sh() {
+ unsafe fn test_mm_maskz_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmadd_sh(0, a, b, c);
+ let r = _mm_maskz_fnmsub_sh(0, a, b, c);
let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_sh(1, a, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_maskz_fnmsub_sh(1, a, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_round_sh() {
+ unsafe fn test_mm_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_round_sh() {
+ unsafe fn test_mm_mask_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 0, b, c,
);
let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 1, b, c,
);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_round_sh() {
+ unsafe fn test_mm_mask3_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 0,
);
let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 1,
);
- let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
+ let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_round_sh() {
+ unsafe fn test_mm_maskz_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0, a, b, c,
);
let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
1, a, b, c,
);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmsub_ph() {
+ unsafe fn test_mm_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_fmsub_ph(a, b, c);
- let e = _mm_set1_ph(-1.0);
+ let r = _mm_fmaddsub_ph(a, b, c);
+ let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmsub_ph() {
+ unsafe fn test_mm_mask_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0);
+ let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c);
+ let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmsub_ph() {
+ unsafe fn test_mm_mask3_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0);
+ let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011);
+ let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmsub_ph() {
+ unsafe fn test_mm_maskz_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0);
+ let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c);
+ let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmsub_ph() {
+ unsafe fn test_mm256_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmsub_ph(a, b, c);
- let e = _mm256_set1_ph(-1.0);
+ let r = _mm256_fmaddsub_ph(a, b, c);
+ let e = _mm256_set_ph(
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmsub_ph() {
+ unsafe fn test_mm256_mask_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c);
+ let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c);
let e = _mm256_set_ph(
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmsub_ph() {
+ unsafe fn test_mm256_mask3_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101);
+ let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011);
let e = _mm256_set_ph(
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmsub_ph() {
+ unsafe fn test_mm256_maskz_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c);
+ let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c);
let e = _mm256_set_ph(
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsub_ph() {
+ unsafe fn test_mm512_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmsub_ph(a, b, c);
- let e = _mm512_set1_ph(-1.0);
+ let r = _mm512_fmaddsub_ph(a, b, c);
+ let e = _mm512_set_ph(
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsub_ph() {
+ unsafe fn test_mm512_mask_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c);
+ let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c);
let e = _mm512_set_ph(
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsub_ph() {
+ unsafe fn test_mm512_mask3_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101);
+ let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011);
let e = _mm512_set_ph(
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsub_ph() {
+ unsafe fn test_mm512_maskz_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c);
+ let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c);
let e = _mm512_set_ph(
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsub_round_ph() {
+ unsafe fn test_mm512_fmaddsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(-1.0);
+ let r =
+ _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set_ph(
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsub_round_ph() {
+ unsafe fn test_mm512_mask_fmaddsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- 0b01010101010101010101010101010101,
+ 0b00110011001100110011001100110011,
b,
c,
);
let e = _mm512_set_ph(
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsub_round_ph() {
+ unsafe fn test_mm512_mask3_fmaddsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
- 0b01010101010101010101010101010101,
+ 0b00110011001100110011001100110011,
);
let e = _mm512_set_ph(
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsub_round_ph() {
+ unsafe fn test_mm512_maskz_fmaddsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00110011001100110011001100110011,
a,
b,
c,
);
let e = _mm512_set_ph(
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmsub_sh(a, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmsub_sh(a, 0, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fmsub_sh(a, 1, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmsub_sh(a, b, c, 0);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- let r = _mm_mask3_fmsub_sh(a, b, c, 1);
- let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmsub_sh(0, a, b, c);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fmsub_sh(1, a, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 0, b, c,
- );
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 1, b, c,
- );
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 0,
- );
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 1,
- );
- let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0, a, b, c,
- );
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 1, a, b, c,
- );
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fnmadd_ph() {
+ unsafe fn test_mm_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_fnmadd_ph(a, b, c);
- let e = _mm_set1_ph(1.0);
+ let r = _mm_fmsubadd_ph(a, b, c);
+ let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fnmadd_ph() {
+ unsafe fn test_mm_mask_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
+ let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c);
+ let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fnmadd_ph() {
+ unsafe fn test_mm_mask3_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
+ let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011);
+ let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fnmadd_ph() {
+ unsafe fn test_mm_maskz_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+ let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c);
+ let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fnmadd_ph() {
+ unsafe fn test_mm256_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_fnmadd_ph(a, b, c);
- let e = _mm256_set1_ph(1.0);
+ let r = _mm256_fmsubadd_ph(a, b, c);
+ let e = _mm256_set_ph(
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fnmadd_ph() {
+ unsafe fn test_mm256_mask_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c);
+ let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c);
let e = _mm256_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fnmadd_ph() {
+ unsafe fn test_mm256_mask3_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101);
+ let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011);
let e = _mm256_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fnmadd_ph() {
+ unsafe fn test_mm256_maskz_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c);
+ let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c);
let e = _mm256_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmadd_ph() {
+ unsafe fn test_mm512_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fnmadd_ph(a, b, c);
- let e = _mm512_set1_ph(1.0);
+ let r = _mm512_fmsubadd_ph(a, b, c);
+ let e = _mm512_set_ph(
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmadd_ph() {
+ unsafe fn test_mm512_mask_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c);
let e = _mm512_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmadd_ph() {
+ unsafe fn test_mm512_mask3_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011);
let e = _mm512_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
- 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmadd_ph() {
+ unsafe fn test_mm512_maskz_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c);
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmadd_round_ph() {
+ unsafe fn test_mm512_fmsubadd_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
let r =
- _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(1.0);
+ _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set_ph(
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmadd_round_ph() {
+ unsafe fn test_mm512_mask_fmsubadd_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- 0b01010101010101010101010101010101,
+ 0b00110011001100110011001100110011,
b,
c,
);
let e = _mm512_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmadd_round_ph() {
+ unsafe fn test_mm512_mask3_fmsubadd_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
- 0b01010101010101010101010101010101,
+ 0b00110011001100110011001100110011,
);
let e = _mm512_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
- 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmadd_round_ph() {
+ unsafe fn test_mm512_maskz_fmsubadd_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00110011001100110011001100110011,
a,
b,
c,
);
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmadd_sh(a, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_rcp_ph() {
+ let a = _mm_set1_ph(2.0);
+ let r = _mm_rcp_ph(a);
+ let e = _mm_set1_ph(0.5);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmadd_sh(a, 0, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fnmadd_sh(a, 1, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_rcp_ph() {
+ let a = _mm_set1_ph(2.0);
+ let src = _mm_set1_ph(1.0);
+ let r = _mm_mask_rcp_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmadd_sh(a, b, c, 0);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmadd_sh(a, b, c, 1);
- let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_rcp_ph() {
+ let a = _mm_set1_ph(2.0);
+ let r = _mm_maskz_rcp_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmadd_sh(0, a, b, c);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmadd_sh(1, a, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_rcp_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let r = _mm256_rcp_ph(a);
+ let e = _mm256_set1_ph(0.5);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_rcp_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let src = _mm256_set1_ph(1.0);
+ let r = _mm256_mask_rcp_ph(src, 0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_rcp_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let r = _mm256_maskz_rcp_ph(0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_rcp_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let r = _mm512_rcp_ph(a);
+ let e = _mm512_set1_ph(0.5);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 0, b, c,
- );
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 1, b, c,
+ unsafe fn test_mm512_mask_rcp_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_rcp_ph(src, 0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
+ 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 0,
- );
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 1,
+ unsafe fn test_mm512_maskz_rcp_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_rcp_ph(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
- let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_rcp_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_rcp_sh(a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0, a, b, c,
- );
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_mask_rcp_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_rcp_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 1, a, b, c,
- );
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_mask_rcp_sh(src, 1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_fnmsub_ph(a, b, c);
- let e = _mm_set1_ph(-5.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_rcp_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_maskz_rcp_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_rcp_sh(1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0);
+ unsafe fn test_mm_rsqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_rsqrt_ph(a);
+ let e = _mm_set1_ph(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0);
+ unsafe fn test_mm_mask_rsqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let src = _mm_set1_ph(1.0);
+ let r = _mm_mask_rsqrt_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0);
+ unsafe fn test_mm_maskz_rsqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_maskz_rsqrt_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_fnmsub_ph(a, b, c);
- let e = _mm256_set1_ph(-5.0);
+ unsafe fn test_mm256_rsqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_rsqrt_ph(a);
+ let e = _mm256_set1_ph(0.5);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c);
+ unsafe fn test_mm256_mask_rsqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let src = _mm256_set1_ph(1.0);
+ let r = _mm256_mask_rsqrt_ph(src, 0b0101010101010101, a);
let e = _mm256_set_ph(
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101);
+ unsafe fn test_mm256_maskz_rsqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_maskz_rsqrt_ph(0b0101010101010101, a);
let e = _mm256_set_ph(
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c);
- let e = _mm256_set_ph(
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
- );
- assert_eq_m256h(r, e);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_rsqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_rsqrt_ph(a);
+ let e = _mm512_set1_ph(0.5);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_fnmsub_ph(a, b, c);
- let e = _mm512_set1_ph(-5.0);
+ unsafe fn test_mm512_mask_rsqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_rsqrt_ph(src, 0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
+ 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c);
+ unsafe fn test_mm512_maskz_rsqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_maskz_rsqrt_ph(0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_rsqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_rsqrt_sh(a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_rsqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_rsqrt_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_rsqrt_sh(src, 1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_rsqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_maskz_rsqrt_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_rsqrt_sh(1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_sqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_sqrt_ph(a);
+ let e = _mm_set1_ph(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_sqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let src = _mm_set1_ph(1.0);
+ let r = _mm_mask_sqrt_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_sqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_maskz_sqrt_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_sqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_sqrt_ph(a);
+ let e = _mm256_set1_ph(2.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_sqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let src = _mm256_set1_ph(1.0);
+ let r = _mm256_mask_sqrt_ph(src, 0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_sqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_maskz_sqrt_ph(0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
- assert_eq_m512h(r, e);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101);
- let e = _mm512_set_ph(
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
- );
+ unsafe fn test_mm512_sqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_sqrt_ph(a);
+ let e = _mm512_set1_ph(2.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c);
+ unsafe fn test_mm512_mask_sqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_sqrt_ph(src, 0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r =
- _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(-5.0);
+ unsafe fn test_mm512_maskz_sqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_maskz_sqrt_ph(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a,
- 0b01010101010101010101010101010101,
- b,
- c,
- );
- let e = _mm512_set_ph(
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- );
+ unsafe fn test_mm512_sqrt_round_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm512_set1_ph(2.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a,
- b,
- c,
+ unsafe fn test_mm512_mask_sqrt_round_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
0b01010101010101010101010101010101,
+ a,
);
let e = _mm512_set_ph(
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_sqrt_round_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
- b,
- c,
);
let e = _mm512_set_ph(
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmsub_sh(a, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmsub_sh(a, 0, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fnmsub_sh(a, 1, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_sqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_sqrt_sh(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmsub_sh(a, b, c, 0);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ unsafe fn test_mm_mask_sqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_sqrt_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmsub_sh(a, b, c, 1);
- let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_sqrt_sh(src, 1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmsub_sh(0, a, b, c);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmsub_sh(1, a, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_maskz_sqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_maskz_sqrt_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_maskz_sqrt_sh(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 0, b, c,
- );
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 1, b, c,
- );
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_sqrt_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 0,
+ unsafe fn test_mm_mask_sqrt_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 1,
+ let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
);
- let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0, a, b, c,
- );
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 1, a, b, c,
- );
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_maskz_sqrt_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r =
+ _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_fmaddsub_ph(a, b, c);
- let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0);
+ let r =
+ _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c);
- let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0);
+ unsafe fn test_mm_max_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_max_ph(a, b);
+ let e = _mm_set1_ph(2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011);
- let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0);
+ unsafe fn test_mm_mask_max_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let src = _mm_set1_ph(3.0);
+ let r = _mm_mask_max_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c);
- let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0);
+ unsafe fn test_mm_maskz_max_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_maskz_max_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmaddsub_ph(a, b, c);
- let e = _mm256_set_ph(
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- );
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c);
- let e = _mm256_set_ph(
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- );
+ unsafe fn test_mm256_max_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_max_ph(a, b);
+ let e = _mm256_set1_ph(2.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011);
+ unsafe fn test_mm256_mask_max_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let src = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_max_ph(src, 0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c);
+ unsafe fn test_mm256_maskz_max_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_maskz_max_ph(0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m256h(r, e);
}
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmaddsub_ph(a, b, c);
- let e = _mm512_set_ph(
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c);
- let e = _mm512_set_ph(
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011);
- let e = _mm512_set_ph(
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c);
- let e = _mm512_set_ph(
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
- );
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_max_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_max_ph(a, b);
+ let e = _mm512_set1_ph(2.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r =
- _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ unsafe fn test_mm512_mask_max_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_max_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
+ 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a,
- 0b00110011001100110011001100110011,
- b,
- c,
- );
+ unsafe fn test_mm512_maskz_max_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_max_ph(0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_max_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(2.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_max_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
a,
b,
- c,
- 0b00110011001100110011001100110011,
);
let e = _mm512_set_ph(
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
+ 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b00110011001100110011001100110011,
+ unsafe fn test_mm512_maskz_max_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
- c,
);
let e = _mm512_set_ph(
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_fmsubadd_ph(a, b, c);
- let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_max_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_max_sh(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c);
- let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_max_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_max_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_max_sh(src, 1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_max_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_maskz_max_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_max_sh(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_max_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_max_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_max_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r =
+ _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011);
- let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0);
+ unsafe fn test_mm_min_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_min_ph(a, b);
+ let e = _mm_set1_ph(1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c);
- let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0);
+ unsafe fn test_mm_mask_min_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let src = _mm_set1_ph(3.0);
+ let r = _mm_mask_min_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmsubadd_ph(a, b, c);
- let e = _mm256_set_ph(
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- );
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_maskz_min_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_maskz_min_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c);
- let e = _mm256_set_ph(
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
- );
+ unsafe fn test_mm256_min_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_min_ph(a, b);
+ let e = _mm256_set1_ph(1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011);
+ unsafe fn test_mm256_mask_min_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let src = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_min_ph(src, 0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c);
+ unsafe fn test_mm256_maskz_min_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_maskz_min_ph(0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmsubadd_ph(a, b, c);
- let e = _mm512_set_ph(
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c);
- let e = _mm512_set_ph(
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
- );
+ unsafe fn test_mm512_min_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_min_ph(a, b);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011);
+ unsafe fn test_mm512_mask_min_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_min_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c);
+ unsafe fn test_mm512_maskz_min_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_min_ph(0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r =
- _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set_ph(
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- );
+ unsafe fn test_mm512_min_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_min_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
a,
- 0b00110011001100110011001100110011,
b,
- c,
);
let e = _mm512_set_ph(
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_min_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
- c,
- 0b00110011001100110011001100110011,
);
let e = _mm512_set_ph(
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b00110011001100110011001100110011,
- a,
- b,
- c,
+ unsafe fn test_mm_min_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_min_sh(a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_min_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_min_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_min_sh(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_min_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_maskz_min_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_min_sh(1, a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_min_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_min_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
);
- let e = _mm512_set_ph(
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_min_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r =
+ _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
}
}
From 487210840bf3ef502b130228a10dcfb57246660e Mon Sep 17 00:00:00 2001
From: sayantn
Date: Mon, 15 Jul 2024 16:01:06 +0530
Subject: [PATCH 06/11] AVX512FP16 Part 5: FP-Support
`getexp`, `getmant`, `roundscale`, `scalef`, `reduce`
---
crates/core_arch/missing-x86.md | 90 -
crates/core_arch/src/x86/avx512fp16.rs | 10447 +++++++++++++++--------
2 files changed, 6866 insertions(+), 3671 deletions(-)
diff --git a/crates/core_arch/missing-x86.md b/crates/core_arch/missing-x86.md
index c0b8aa1457..72fc8b840e 100644
--- a/crates/core_arch/missing-x86.md
+++ b/crates/core_arch/missing-x86.md
@@ -103,10 +103,6 @@
* [ ] [`_mm512_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps)
* [ ] [`_mm512_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph)
* [ ] [`_mm512_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fpclass_ph_mask)
- * [ ] [`_mm512_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_ph)
- * [ ] [`_mm512_getexp_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ph)
- * [ ] [`_mm512_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ph)
- * [ ] [`_mm512_getmant_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ph)
* [ ] [`_mm512_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph)
* [ ] [`_mm512_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask)
* [ ] [`_mm512_mask_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask)
@@ -155,16 +151,6 @@
* [ ] [`_mm512_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps)
* [ ] [`_mm512_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph)
* [ ] [`_mm512_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fpclass_ph_mask)
- * [ ] [`_mm512_mask_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_ph)
- * [ ] [`_mm512_mask_getexp_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ph)
- * [ ] [`_mm512_mask_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_ph)
- * [ ] [`_mm512_mask_getmant_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_ph)
- * [ ] [`_mm512_mask_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph)
- * [ ] [`_mm512_mask_reduce_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph)
- * [ ] [`_mm512_mask_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ph)
- * [ ] [`_mm512_mask_roundscale_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_ph)
- * [ ] [`_mm512_mask_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_ph)
- * [ ] [`_mm512_mask_scalef_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph)
* [ ] [`_mm512_maskz_cvt_roundepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph)
* [ ] [`_mm512_maskz_cvt_roundepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph)
* [ ] [`_mm512_maskz_cvt_roundepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph)
@@ -209,28 +195,12 @@
* [ ] [`_mm512_maskz_cvtx_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph)
* [ ] [`_mm512_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps)
* [ ] [`_mm512_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph)
- * [ ] [`_mm512_maskz_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_ph)
- * [ ] [`_mm512_maskz_getexp_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ph)
- * [ ] [`_mm512_maskz_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ph)
- * [ ] [`_mm512_maskz_getmant_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_ph)
- * [ ] [`_mm512_maskz_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph)
- * [ ] [`_mm512_maskz_reduce_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph)
- * [ ] [`_mm512_maskz_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ph)
- * [ ] [`_mm512_maskz_roundscale_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_ph)
- * [ ] [`_mm512_maskz_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_ph)
- * [ ] [`_mm512_maskz_scalef_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ph)
* [ ] [`_mm512_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph)
* [ ] [`_mm512_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph)
* [ ] [`_mm512_reduce_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ph)
* [ ] [`_mm512_reduce_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ph)
* [ ] [`_mm512_reduce_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ph)
* [ ] [`_mm512_reduce_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_ph)
- * [ ] [`_mm512_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_ph)
- * [ ] [`_mm512_reduce_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_round_ph)
- * [ ] [`_mm512_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ph)
- * [ ] [`_mm512_roundscale_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_ph)
- * [ ] [`_mm512_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ph)
- * [ ] [`_mm512_scalef_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ph)
* [ ] [`_mm512_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_pch)
* [ ] [`_mm_cvt_roundi32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh)
* [ ] [`_mm_cvt_roundi64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sh)
@@ -268,10 +238,6 @@
* [ ] [`_mm_cvtu32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh)
* [ ] [`_mm_cvtu64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sh)
* [ ] [`_mm_fpclass_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_sh_mask)
- * [ ] [`_mm_getexp_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_sh)
- * [ ] [`_mm_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sh)
- * [ ] [`_mm_getmant_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sh)
- * [ ] [`_mm_getmant_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sh)
* [ ] [`_mm_mask_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh)
* [ ] [`_mm_mask_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd)
* [ ] [`_mm_mask_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss)
@@ -281,16 +247,6 @@
* [ ] [`_mm_mask_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss)
* [ ] [`_mm_mask_cvtss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh)
* [ ] [`_mm_mask_fpclass_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_sh_mask)
- * [ ] [`_mm_mask_getexp_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_sh)
- * [ ] [`_mm_mask_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh)
- * [ ] [`_mm_mask_getmant_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh)
- * [ ] [`_mm_mask_getmant_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh)
- * [ ] [`_mm_mask_reduce_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh)
- * [ ] [`_mm_mask_reduce_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh)
- * [ ] [`_mm_mask_roundscale_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_sh)
- * [ ] [`_mm_mask_roundscale_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_sh)
- * [ ] [`_mm_mask_scalef_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_sh)
- * [ ] [`_mm_mask_scalef_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh)
* [ ] [`_mm_maskz_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh)
* [ ] [`_mm_maskz_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd)
* [ ] [`_mm_maskz_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss)
@@ -299,22 +255,6 @@
* [ ] [`_mm_maskz_cvtsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd)
* [ ] [`_mm_maskz_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss)
* [ ] [`_mm_maskz_cvtss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh)
- * [ ] [`_mm_maskz_getexp_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_sh)
- * [ ] [`_mm_maskz_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh)
- * [ ] [`_mm_maskz_getmant_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh)
- * [ ] [`_mm_maskz_getmant_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh)
- * [ ] [`_mm_maskz_reduce_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh)
- * [ ] [`_mm_maskz_reduce_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh)
- * [ ] [`_mm_maskz_roundscale_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_sh)
- * [ ] [`_mm_maskz_roundscale_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_sh)
- * [ ] [`_mm_maskz_scalef_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_sh)
- * [ ] [`_mm_maskz_scalef_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh)
- * [ ] [`_mm_reduce_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh)
- * [ ] [`_mm_reduce_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh)
- * [ ] [`_mm_roundscale_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_sh)
- * [ ] [`_mm_roundscale_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_sh)
- * [ ] [`_mm_scalef_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sh)
- * [ ] [`_mm_scalef_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sh)
* [ ] [`_mm_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pch)
@@ -345,8 +285,6 @@
* [ ] [`_mm256_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps)
* [ ] [`_mm256_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph)
* [ ] [`_mm256_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask)
- * [ ] [`_mm256_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getexp_ph)
- * [ ] [`_mm256_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getmant_ph)
* [ ] [`_mm256_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph)
* [ ] [`_mm256_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask)
* [ ] [`_mm256_mask_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph)
@@ -372,11 +310,6 @@
* [ ] [`_mm256_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps)
* [ ] [`_mm256_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph)
* [ ] [`_mm256_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask)
- * [ ] [`_mm256_mask_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getexp_ph)
- * [ ] [`_mm256_mask_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph)
- * [ ] [`_mm256_mask_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph)
- * [ ] [`_mm256_mask_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph)
- * [ ] [`_mm256_mask_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph)
* [ ] [`_mm256_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph)
* [ ] [`_mm256_maskz_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph)
* [ ] [`_mm256_maskz_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph)
@@ -399,20 +332,12 @@
* [ ] [`_mm256_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64)
* [ ] [`_mm256_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps)
* [ ] [`_mm256_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph)
- * [ ] [`_mm256_maskz_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph)
- * [ ] [`_mm256_maskz_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph)
- * [ ] [`_mm256_maskz_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph)
- * [ ] [`_mm256_maskz_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph)
- * [ ] [`_mm256_maskz_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_scalef_ph)
* [ ] [`_mm256_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph)
* [ ] [`_mm256_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph)
* [ ] [`_mm256_reduce_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_add_ph)
* [ ] [`_mm256_reduce_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_max_ph)
* [ ] [`_mm256_reduce_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_min_ph)
* [ ] [`_mm256_reduce_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_mul_ph)
- * [ ] [`_mm256_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_ph)
- * [ ] [`_mm256_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ph)
- * [ ] [`_mm256_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph)
* [ ] [`_mm_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask)
* [ ] [`_mm_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph)
* [ ] [`_mm_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph)
@@ -437,8 +362,6 @@
* [ ] [`_mm_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps)
* [ ] [`_mm_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph)
* [ ] [`_mm_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask)
- * [ ] [`_mm_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ph)
- * [ ] [`_mm_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ph)
* [ ] [`_mm_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph)
* [ ] [`_mm_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask)
* [ ] [`_mm_mask_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph)
@@ -464,11 +387,6 @@
* [ ] [`_mm_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps)
* [ ] [`_mm_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph)
* [ ] [`_mm_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask)
- * [ ] [`_mm_mask_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ph)
- * [ ] [`_mm_mask_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ph)
- * [ ] [`_mm_mask_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph)
- * [ ] [`_mm_mask_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph)
- * [ ] [`_mm_mask_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph)
* [ ] [`_mm_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph)
* [ ] [`_mm_maskz_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph)
* [ ] [`_mm_maskz_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph)
@@ -491,20 +409,12 @@
* [ ] [`_mm_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64)
* [ ] [`_mm_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps)
* [ ] [`_mm_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph)
- * [ ] [`_mm_maskz_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph)
- * [ ] [`_mm_maskz_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ph)
- * [ ] [`_mm_maskz_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph)
- * [ ] [`_mm_maskz_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph)
- * [ ] [`_mm_maskz_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ph)
* [ ] [`_mm_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph)
* [ ] [`_mm_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph)
* [ ] [`_mm_reduce_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_add_ph)
* [ ] [`_mm_reduce_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_max_ph)
* [ ] [`_mm_reduce_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_min_ph)
* [ ] [`_mm_reduce_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_mul_ph)
- * [ ] [`_mm_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_ph)
- * [ ] [`_mm_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ph)
- * [ ] [`_mm_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ph)
diff --git a/crates/core_arch/src/x86/avx512fp16.rs b/crates/core_arch/src/x86/avx512fp16.rs
index b30bc63ed4..3c04d9ae90 100644
--- a/crates/core_arch/src/x86/avx512fp16.rs
+++ b/crates/core_arch/src/x86/avx512fp16.rs
@@ -624,12 +624,13 @@ pub unsafe fn _mm512_zextph128_ph512(a: __m128h) -> __m512h {
#[target_feature(enable = "avx512fp16")]
#[rustc_legacy_const_generics(2, 3)]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm_cmp_round_sh_mask(
+pub unsafe fn _mm_cmp_round_sh_mask(
a: __m128h,
b: __m128h,
) -> __mmask8 {
+ static_assert_uimm_bits!(IMM5, 5);
static_assert_sae!(SAE);
- _mm_mask_cmp_round_sh_mask::(0xff, a, b)
+ _mm_mask_cmp_round_sh_mask::(0xff, a, b)
}
/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
@@ -641,13 +642,14 @@ pub unsafe fn _mm_cmp_round_sh_mask(
#[target_feature(enable = "avx512fp16")]
#[rustc_legacy_const_generics(3, 4)]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm_mask_cmp_round_sh_mask(
+pub unsafe fn _mm_mask_cmp_round_sh_mask(
k1: __mmask8,
a: __m128h,
b: __m128h,
) -> __mmask8 {
+ static_assert_uimm_bits!(IMM5, 5);
static_assert_sae!(SAE);
- vcmpsh(a, b, IMM8, k1, SAE)
+ vcmpsh(a, b, IMM5, k1, SAE)
}
/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
@@ -658,8 +660,9 @@ pub unsafe fn _mm_mask_cmp_round_sh_mask(
#[target_feature(enable = "avx512fp16")]
#[rustc_legacy_const_generics(2)]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm_cmp_sh_mask(a: __m128h, b: __m128h) -> __mmask8 {
- _mm_cmp_round_sh_mask::(a, b)
+pub unsafe fn _mm_cmp_sh_mask(a: __m128h, b: __m128h) -> __mmask8 {
+ static_assert_uimm_bits!(IMM5, 5);
+ _mm_cmp_round_sh_mask::(a, b)
}
/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
@@ -670,12 +673,13 @@ pub unsafe fn _mm_cmp_sh_mask(a: __m128h, b: __m128h) -> __mmas
#[target_feature(enable = "avx512fp16")]
#[rustc_legacy_const_generics(3)]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm_mask_cmp_sh_mask(
+pub unsafe fn _mm_mask_cmp_sh_mask(
k1: __mmask8,
a: __m128h,
b: __m128h,
) -> __mmask8 {
- _mm_mask_cmp_round_sh_mask::(k1, a, b)
+ static_assert_uimm_bits!(IMM5, 5);
+ _mm_mask_cmp_round_sh_mask::(k1, a, b)
}
/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are zeroed.
@@ -706,9 +710,10 @@ pub unsafe fn _mm512_zextph256_ph512(a: __m256h) -> __m512h {
#[target_feature(enable = "avx512fp16")]
#[rustc_legacy_const_generics(2, 3)]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm_comi_round_sh(a: __m128h, b: __m128h) -> i32 {
+pub unsafe fn _mm_comi_round_sh(a: __m128h, b: __m128h) -> i32 {
+ static_assert_uimm_bits!(IMM5, 5);
static_assert_sae!(SAE);
- vcomish(a, b, IMM8, SAE)
+ vcomish(a, b, IMM5, SAE)
}
/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
@@ -719,8 +724,9 @@ pub unsafe fn _mm_comi_round_sh(a: __m128h, b:
#[target_feature(enable = "avx512fp16")]
#[rustc_legacy_const_generics(2)]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm_comi_sh(a: __m128h, b: __m128h) -> i32 {
- _mm_comi_round_sh::(a, b)
+pub unsafe fn _mm_comi_sh(a: __m128h, b: __m128h) -> i32 {
+ static_assert_uimm_bits!(IMM5, 5);
+ _mm_comi_round_sh::(a, b)
}
/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and return
@@ -8440,6033 +8446,9312 @@ pub unsafe fn _mm_maskz_min_round_sh(
_mm_mask_min_round_sh::(_mm_setzero_ph(), k, a, b)
}
-#[allow(improper_ctypes)]
-extern "C" {
- #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
- fn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
- #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
- fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
+/// This intrinsic essentially calculates `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_getexp_ph(a: __m128h) -> __m128h {
+ _mm_mask_getexp_ph(_mm_undefined_ph(), 0xff, a)
+}
- #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
- fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
- fn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
- fn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
- fn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
+/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
+ vgetexpph_128(a, src, k)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
- fn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
- fn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
- fn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
- fn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
+/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h {
+ _mm_mask_getexp_ph(_mm_setzero_ph(), k, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
- fn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
- fn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
- fn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
- fn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
+/// This intrinsic essentially calculates `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_getexp_ph(a: __m256h) -> __m256h {
+ _mm256_mask_getexp_ph(_mm256_undefined_ph(), 0xffff, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
- fn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
- fn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
- fn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
- fn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
+/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
+ vgetexpph_256(a, src, k)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
- fn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
- fn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
- fn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
- fn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
- fn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
- fn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
- fn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
- fn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
+/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h {
+ _mm256_mask_getexp_ph(_mm256_setzero_ph(), k, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
- fn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
- fn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
- fn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
- fn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
- fn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
- -> __m512;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
- fn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
- -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
- fn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
- fn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
+/// This intrinsic essentially calculates `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_getexp_ph(a: __m512h) -> __m512h {
+ _mm512_mask_getexp_ph(_mm512_undefined_ph(), 0xffffffff, a)
+}
- #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
- fn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.fma.f16"]
- fn fmaf16(a: f16, b: f16, c: f16) -> f16; // TODO: use `crate::intrinsics::fmaf16` when it's available
- #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
- fn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
+/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
+ _mm512_mask_getexp_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a)
+}
- #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"]
- fn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"]
- fn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
- fn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
+/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h {
+ _mm512_mask_getexp_ph(_mm512_setzero_ph(), k, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128"]
- fn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256"]
- fn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512"]
- fn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.sh"]
- fn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
+/// This intrinsic essentially calculates `floor(log2(x))` for each element. Exceptions can be suppressed
+/// by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_getexp_round_ph(a: __m512h) -> __m512h {
+ static_assert_sae!(SAE);
+ _mm512_mask_getexp_round_ph::(_mm512_undefined_ph(), 0xffffffff, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128"]
- fn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256"]
- fn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512"]
- fn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh"]
- fn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
+/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_getexp_round_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ static_assert_sae!(SAE);
+ vgetexpph_512(a, src, k, SAE)
+}
- #[link_name = "llvm.x86.avx512fp16.sqrt.ph.512"]
- fn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh"]
- fn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
+/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_getexp_round_ph(k: __mmask32, a: __m512h) -> __m512h {
+ static_assert_sae!(SAE);
+ _mm512_mask_getexp_round_ph::(_mm512_setzero_ph(), k, a)
+}
- #[link_name = "llvm.x86.avx512fp16.max.ph.128"]
- fn vmaxph_128(a: __m128h, b: __m128h) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.max.ph.256"]
- fn vmaxph_256(a: __m256h, b: __m256h) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.max.ph.512"]
- fn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.max.sh.round"]
- fn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
+/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
+/// calculates `floor(log2(x))` for the lower element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_getexp_sh(_mm_undefined_ph(), 0xff, a, b)
+}
- #[link_name = "llvm.x86.avx512fp16.min.ph.128"]
- fn vminph_128(a: __m128h, b: __m128h) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.min.ph.256"]
- fn vminph_256(a: __m256h, b: __m256h) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.min.ph.512"]
- fn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.min.sh.round"]
- fn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
+/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
+/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
+/// for the lower element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_getexp_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
+/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
+/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
+/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
+/// lower element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_getexp_sh(_mm_setzero_ph(), k, a, b)
}
-#[cfg(test)]
-mod tests {
- use crate::core_arch::x86::*;
- use crate::mem::transmute;
- use crate::ptr::{addr_of, addr_of_mut};
- use stdarch_test::simd_test;
+/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
+/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
+/// calculates `floor(log2(x))` for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
+/// in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_getexp_round_sh(a: __m128h, b: __m128h) -> __m128h {
+ static_assert_sae!(SAE);
+ _mm_mask_getexp_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
- #[target_feature(enable = "avx512fp16")]
- unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
- _mm_setr_ph(re, im, re, im, re, im, re, im)
- }
+/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
+/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
+/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
+/// for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_getexp_round_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_sae!(SAE);
+ vgetexpsh(a, b, src, k, SAE)
+}
- #[target_feature(enable = "avx512fp16")]
- unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
- _mm256_setr_ph(
- re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
- )
- }
+/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
+/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
+/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
+/// lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_getexp_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_sae!(SAE);
+ _mm_mask_getexp_round_sh::(_mm_setzero_ph(), k, a, b)
+}
- #[target_feature(enable = "avx512fp16")]
- unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
- _mm512_setr_ph(
- re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
- re, im, re, im, re, im, re, im, re, im,
- )
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
+/// on the interval range defined by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(1, 2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ a: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ _mm_mask_getmant_ph::(_mm_undefined_ph(), 0xff, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ vgetmantph_128(a, (SIGN << 2) | NORM, src, k)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ k: __mmask8,
+ a: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ _mm_mask_getmant_ph::(_mm_setzero_ph(), k, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
+/// on the interval range defined by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(1, 2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ a: __m256h,
+) -> __m256h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ _mm256_mask_getmant_ph::(_mm256_undefined_ph(), 0xffff, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ src: __m256h,
+ k: __mmask16,
+ a: __m256h,
+) -> __m256h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ vgetmantph_256(a, (SIGN << 2) | NORM, src, k)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ k: __mmask16,
+ a: __m256h,
+) -> __m256h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ _mm256_mask_getmant_ph::(_mm256_setzero_ph(), k, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
+/// on the interval range defined by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(1, 2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ a: __m512h,
+) -> __m512h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ _mm512_mask_getmant_ph::(_mm512_undefined_ph(), 0xffffffff, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ _mm512_mask_getmant_round_ph::(src, k, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ _mm512_mask_getmant_ph::(_mm512_setzero_ph(), k, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
+/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
+/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
+#[rustc_legacy_const_generics(1, 2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_getmant_round_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+ const SAE: i32,
+>(
+ a: __m512h,
+) -> __m512h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ static_assert_sae!(SAE);
+ _mm512_mask_getmant_round_ph::(_mm512_undefined_ph(), 0xffffffff, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
+/// in the sae parameter
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
+#[rustc_legacy_const_generics(3, 4, 5)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_getmant_round_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+ const SAE: i32,
+>(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ static_assert_sae!(SAE);
+ vgetmantph_512(a, (SIGN << 2) | NORM, src, k, SAE)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
+/// in the sae parameter
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
+#[rustc_legacy_const_generics(2, 3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_getmant_round_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+ const SAE: i32,
+>(
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ static_assert_sae!(SAE);
+ _mm512_mask_getmant_round_ph::(_mm512_setzero_ph(), k, a)
+}
+
+/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
+/// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
+/// on the interval range defined by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_getmant_sh<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ _mm_mask_getmant_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
+/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
+/// the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(4, 5)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_getmant_sh<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ _mm_mask_getmant_round_sh::(src, k, a, b)
+}
+
+/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
+/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
+/// the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_getmant_sh<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ _mm_mask_getmant_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
+/// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
+/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
+/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
+#[rustc_legacy_const_generics(2, 3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_getmant_round_sh<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+ const SAE: i32,
+>(
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ static_assert_sae!(SAE);
+ _mm_mask_getmant_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
+/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
+/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
+#[rustc_legacy_const_generics(4, 5, 6)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_getmant_round_sh<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+ const SAE: i32,
+>(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ static_assert_sae!(SAE);
+ vgetmantsh(a, b, (SIGN << 2) | NORM, src, k, SAE)
+}
+
+/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
+/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
+/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
+#[rustc_legacy_const_generics(3, 4, 5)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_getmant_round_sh<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+ const SAE: i32,
+>(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(NORM, 4);
+ static_assert_uimm_bits!(SIGN, 2);
+ static_assert_sae!(SAE);
+ _mm_mask_getmant_round_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_roundscale_ph(a: __m128h) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm_mask_roundscale_ph::(_mm_undefined_ph(), 0xff, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
+/// the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_roundscale_ph(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ vrndscaleph_128(a, IMM8, src, k)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_roundscale_ph(k: __mmask8, a: __m128h) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm_mask_roundscale_ph::(_mm_setzero_ph(), k, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_roundscale_ph(a: __m256h) -> __m256h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm256_mask_roundscale_ph::(_mm256_undefined_ph(), 0xffff, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
+/// the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_roundscale_ph(
+ src: __m256h,
+ k: __mmask16,
+ a: __m256h,
+) -> __m256h {
+ static_assert_uimm_bits!(IMM8, 8);
+ vrndscaleph_256(a, IMM8, src, k)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_roundscale_ph(k: __mmask16, a: __m256h) -> __m256h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm256_mask_roundscale_ph::(_mm256_setzero_ph(), k, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_roundscale_ph(a: __m512h) -> __m512h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm512_mask_roundscale_ph::(_mm512_undefined_ph(), 0xffffffff, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
+/// the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_roundscale_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm512_mask_roundscale_round_ph::(src, k, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_roundscale_ph(k: __mmask32, a: __m512h) -> __m512h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm512_mask_roundscale_ph::(_mm512_setzero_ph(), k, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
+/// in the sae parameter
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(1, 2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_roundscale_round_ph(a: __m512h) -> __m512h {
+ static_assert_uimm_bits!(IMM8, 8);
+ static_assert_sae!(SAE);
+ _mm512_mask_roundscale_round_ph::(_mm512_undefined_ph(), 0xffffffff, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
+/// the corresponding mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
+/// in the sae parameter
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_roundscale_round_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ static_assert_uimm_bits!(IMM8, 8);
+ static_assert_sae!(SAE);
+ vrndscaleph_512(a, IMM8, src, k, SAE)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_roundscale_round_ph(
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ static_assert_uimm_bits!(IMM8, 8);
+ static_assert_sae!(SAE);
+ _mm512_mask_roundscale_round_ph::(_mm512_setzero_ph(), k, a)
+}
+
+/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
+/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
+/// from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_roundscale_sh(a: __m128h, b: __m128h) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm_mask_roundscale_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
+/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
+/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_roundscale_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm_mask_roundscale_round_sh::(src, k, a, b)
+}
+
+/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
+/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
+/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_roundscale_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm_mask_roundscale_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
+/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
+/// from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_roundscale_round_sh(
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ static_assert_sae!(SAE);
+ _mm_mask_roundscale_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
+/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
+/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(4, 5)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_roundscale_round_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ static_assert_sae!(SAE);
+ vrndscalesh(a, b, src, k, IMM8, SAE)
+}
+
+/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
+/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
+/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_roundscale_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ static_assert_sae!(SAE);
+ _mm_mask_roundscale_round_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_scalef_ph(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ vscalefph_128(a, b, src, k)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_scalef_ph(_mm_setzero_ph(), k, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h {
+ _mm256_mask_scalef_ph(_mm256_undefined_ph(), 0xffff, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ vscalefph_256(a, b, src, k)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ _mm256_mask_scalef_ph(_mm256_setzero_ph(), k, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mask_scalef_ph(_mm512_undefined_ph(), 0xffffffff, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mask_scalef_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mask_scalef_ph(_mm512_setzero_ph(), k, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_scalef_round_ph(a: __m512h, b: __m512h) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_scalef_round_ph::(_mm512_undefined_ph(), 0xffffffff, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_scalef_round_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ vscalefph_512(a, b, src, k, ROUNDING)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_scalef_round_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_scalef_round_ph::(_mm512_setzero_ph(), k, a, b)
+}
+
+/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
+/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_scalef_sh(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
+/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_scalef_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
+
+/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
+/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_scalef_sh(_mm_setzero_ph(), k, a, b)
+}
+
+/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
+/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_scalef_round_sh(a: __m128h, b: __m128h) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_scalef_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
+/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_scalef_round_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ vscalefsh(a, b, src, k, ROUNDING)
+}
+
+/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
+/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_scalef_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_scalef_round_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_reduce_ph(a: __m128h) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm_mask_reduce_ph::(_mm_undefined_ph(), 0xff, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_reduce_ph(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ vreduceph_128(a, IMM8, src, k)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_reduce_ph(k: __mmask8, a: __m128h) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm_mask_reduce_ph::(_mm_setzero_ph(), k, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_reduce_ph(a: __m256h) -> __m256h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm256_mask_reduce_ph::(_mm256_undefined_ph(), 0xffff, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_reduce_ph(
+ src: __m256h,
+ k: __mmask16,
+ a: __m256h,
+) -> __m256h {
+ static_assert_uimm_bits!(IMM8, 8);
+ vreduceph_256(a, IMM8, src, k)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_reduce_ph(k: __mmask16, a: __m256h) -> __m256h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm256_mask_reduce_ph::(_mm256_setzero_ph(), k, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_reduce_ph(a: __m512h) -> __m512h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm512_mask_reduce_ph::(_mm512_undefined_ph(), 0xffffffff, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_reduce_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm512_mask_reduce_round_ph::(src, k, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_reduce_ph(k: __mmask32, a: __m512h) -> __m512h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm512_mask_reduce_ph::(_mm512_setzero_ph(), k, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(1, 2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_reduce_round_ph(a: __m512h) -> __m512h {
+ static_assert_uimm_bits!(IMM8, 8);
+ static_assert_sae!(SAE);
+ _mm512_mask_reduce_round_ph::(_mm512_undefined_ph(), 0xffffffff, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_reduce_round_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ static_assert_uimm_bits!(IMM8, 8);
+ static_assert_sae!(SAE);
+ vreduceph_512(a, IMM8, src, k, SAE)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_reduce_round_ph(
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ static_assert_uimm_bits!(IMM8, 8);
+ static_assert_sae!(SAE);
+ _mm512_mask_reduce_round_ph::(_mm512_setzero_ph(), k, a)
+}
+
+/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
+/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the
+/// upper 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_reduce_sh(a: __m128h, b: __m128h) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm_mask_reduce_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
+/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
+/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from
+/// a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_reduce_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm_mask_reduce_round_sh::(src, k, a, b)
+}
+
+/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
+/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
+/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
+/// to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_reduce_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ _mm_mask_reduce_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
+/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the upper
+/// 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_reduce_round_sh(
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ static_assert_sae!(SAE);
+ _mm_mask_reduce_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
+/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
+/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a
+/// to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(4, 5)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_reduce_round_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ static_assert_sae!(SAE);
+ vreducesh(a, b, src, k, IMM8, SAE)
+}
+
+/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
+/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
+/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
+/// to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_reduce_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ static_assert_uimm_bits!(IMM8, 8);
+ static_assert_sae!(SAE);
+ _mm_mask_reduce_round_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
+ fn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
+ #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
+ fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
+
+ #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
+ fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
+ fn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
+ fn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
+ fn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
+ fn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
+ fn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
+ fn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
+ fn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
+ fn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
+ fn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
+ fn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
+ fn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
+ fn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
+ fn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
+ fn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
+ fn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
+ fn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
+ fn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
+ fn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
+ fn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
+ fn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
+ fn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
+ fn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
+ fn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
+ fn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
+ fn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
+ fn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
+ fn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
+ fn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
+ -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
+ fn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
+ -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
+ fn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
+ fn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
+ fn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.fma.f16"]
+ fn fmaf16(a: f16, b: f16, c: f16) -> f16; // TODO: use `crate::intrinsics::fmaf16` when it's available
+ #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
+ fn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
+
+ #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"]
+ fn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"]
+ fn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
+ fn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128"]
+ fn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256"]
+ fn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512"]
+ fn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.sh"]
+ fn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128"]
+ fn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256"]
+ fn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512"]
+ fn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh"]
+ fn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.sqrt.ph.512"]
+ fn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh"]
+ fn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.max.ph.128"]
+ fn vmaxph_128(a: __m128h, b: __m128h) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.max.ph.256"]
+ fn vmaxph_256(a: __m256h, b: __m256h) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.max.ph.512"]
+ fn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.max.sh.round"]
+ fn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.min.ph.128"]
+ fn vminph_128(a: __m128h, b: __m128h) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.min.ph.256"]
+ fn vminph_256(a: __m256h, b: __m256h) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.min.ph.512"]
+ fn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.min.sh.round"]
+ fn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.128"]
+ fn vgetexpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.256"]
+ fn vgetexpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.512"]
+ fn vgetexpph_512(a: __m512h, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getexp.sh"]
+ fn vgetexpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.128"]
+ fn vgetmantph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.256"]
+ fn vgetmantph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.512"]
+ fn vgetmantph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getmant.sh"]
+ fn vgetmantsh(
+ a: __m128h,
+ b: __m128h,
+ imm8: i32,
+ src: __m128h,
+ k: __mmask8,
+ sae: i32,
+ ) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.128"]
+ fn vrndscaleph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.256"]
+ fn vrndscaleph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.512"]
+ fn vrndscaleph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rndscale.sh"]
+ fn vrndscalesh(
+ a: __m128h,
+ b: __m128h,
+ src: __m128h,
+ k: __mmask8,
+ imm8: i32,
+ sae: i32,
+ ) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.128"]
+ fn vscalefph_128(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.256"]
+ fn vscalefph_256(a: __m256h, b: __m256h, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.512"]
+ fn vscalefph_512(a: __m512h, b: __m512h, src: __m512h, k: __mmask32, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.scalef.sh"]
+ fn vscalefsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.128"]
+ fn vreduceph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.256"]
+ fn vreduceph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.512"]
+ fn vreduceph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.reduce.sh"]
+ fn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32)
+ -> __m128h;
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::x86::*;
+ use crate::mem::transmute;
+ use crate::ptr::{addr_of, addr_of_mut};
+ use stdarch_test::simd_test;
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
+ _mm_setr_ph(re, im, re, im, re, im, re, im)
+ }
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
+ _mm256_setr_ph(
+ re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
+ )
+ }
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
+ _mm512_setr_ph(
+ re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
+ re, im, re, im, re, im, re, im, re, im,
+ )
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set_ph() {
+ let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_set_ph() {
+ let r = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let e = _mm256_setr_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_set_ph() {
+ let r = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let e = _mm512_setr_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set_sh() {
+ let r = _mm_set_sh(1.0);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set1_ph() {
+ let r = _mm_set1_ph(1.0);
+ let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_set1_ph() {
+ let r = _mm256_set1_ph(1.0);
+ let e = _mm256_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_set1_ph() {
+ let r = _mm512_set1_ph(1.0);
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_setr_ph() {
+ let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_setr_ph() {
+ let r = _mm256_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let e = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_setr_ph() {
+ let r = _mm512_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let e = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_setzero_ph() {
+ let r = _mm_setzero_ph();
+ let e = _mm_set1_ph(0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_setzero_ph() {
+ let r = _mm256_setzero_ph();
+ let e = _mm256_set1_ph(0.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_setzero_ph() {
+ let r = _mm512_setzero_ph();
+ let e = _mm512_set1_ph(0.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castsi128_ph() {
+ let a = _mm_set1_epi16(0x3c00);
+ let r = _mm_castsi128_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castsi256_ph() {
+ let a = _mm256_set1_epi16(0x3c00);
+ let r = _mm256_castsi256_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castsi512_ph() {
+ let a = _mm512_set1_epi16(0x3c00);
+ let r = _mm512_castsi512_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_si128() {
+ let a = _mm_set1_ph(1.0);
+ let r = _mm_castph_si128(a);
+ let e = _mm_set1_epi16(0x3c00);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_si256() {
+ let a = _mm256_set1_ph(1.0);
+ let r = _mm256_castph_si256(a);
+ let e = _mm256_set1_epi16(0x3c00);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_si512() {
+ let a = _mm512_set1_ph(1.0);
+ let r = _mm512_castph_si512(a);
+ let e = _mm512_set1_epi16(0x3c00);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castps_ph() {
+ let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
+ let r = _mm_castps_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castps_ph() {
+ let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
+ let r = _mm256_castps_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castps_ph() {
+ let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
+ let r = _mm512_castps_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_ps() {
+ let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
+ let r = _mm_castph_ps(a);
+ let e = _mm_set1_ps(1.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_ps() {
+ let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
+ let r = _mm256_castph_ps(a);
+ let e = _mm256_set1_ps(1.0);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_ps() {
+ let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
+ let r = _mm512_castph_ps(a);
+ let e = _mm512_set1_ps(1.0);
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castpd_ph() {
+ let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
+ let r = _mm_castpd_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castpd_ph() {
+ let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
+ let r = _mm256_castpd_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castpd_ph() {
+ let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
+ let r = _mm512_castpd_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_pd() {
+ let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
+ let r = _mm_castph_pd(a);
+ let e = _mm_set1_pd(1.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_pd() {
+ let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
+ let r = _mm256_castph_pd(a);
+ let e = _mm256_set1_pd(1.0);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_pd() {
+ let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
+ let r = _mm512_castph_pd(a);
+ let e = _mm512_set1_pd(1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph256_ph128() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm256_castph256_ph128(a);
+ let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph512_ph128() {
+ let a = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
+ 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ );
+ let r = _mm512_castph512_ph128(a);
+ let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph512_ph256() {
+ let a = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
+ 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ );
+ let r = _mm512_castph512_ph256(a);
+ let e = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph128_ph256() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm256_castph128_ph256(a);
+ assert_eq_m128h(_mm256_castph256_ph128(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph128_ph512() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_castph128_ph512(a);
+ assert_eq_m128h(_mm512_castph512_ph128(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph256_ph512() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_castph256_ph512(a);
+ assert_eq_m256h(_mm512_castph512_ph256(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_zextph128_ph256() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm256_zextph128_ph256(a);
+ let e = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_zextph128_ph512() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_zextph128_ph512(a);
+ let e = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_zextph256_ph512() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_zextph256_ph512(a);
+ let e = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cmp_round_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cmp_round_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cmp_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cmp_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comi_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comi_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comieq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comieq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comige_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comige_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comigt_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comigt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comile_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comile_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comilt_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comilt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comineq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comineq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomieq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomieq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomige_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomige_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomigt_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomigt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomile_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomile_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomilt_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomilt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomineq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomineq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_load_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_load_ph(addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_load_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_load_ph(addr_of!(a).cast());
+ assert_eq_m256h(a, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set_ph() {
- let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ unsafe fn test_mm512_load_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_load_ph(addr_of!(a).cast());
+ assert_eq_m512h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_load_sh(addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let src = _mm_set_sh(2.);
+ let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
+ assert_eq_m128h(src, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
+ assert_eq_m128h(_mm_setzero_ph(), b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_loadu_ph() {
+ let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
+ let r = _mm_loadu_ph(array.as_ptr());
+ let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_loadu_ph() {
+ let array = [
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ ];
+ let r = _mm256_loadu_ph(array.as_ptr());
+ let e = _mm256_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_loadu_ph() {
+ let array = [
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ ];
+ let r = _mm512_loadu_ph(array.as_ptr());
+ let e = _mm512_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let r = _mm_move_sh(a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let src = _mm_set_sh(10.0);
+ let r = _mm_mask_move_sh(src, 0, a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let r = _mm_maskz_move_sh(0, a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_store_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let mut b = _mm_setzero_ph();
+ _mm_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_store_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let mut b = _mm256_setzero_ph();
+ _mm256_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m256h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_store_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let mut b = _mm512_setzero_ph();
+ _mm512_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m512h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_store_sh() {
+ let a = _mm_set_sh(1.0);
+ let mut b = _mm_setzero_ph();
+ _mm_store_sh(addr_of_mut!(b).cast(), a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_store_sh() {
+ let a = _mm_set_sh(1.0);
+ let mut b = _mm_setzero_ph();
+ _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
+ assert_eq_m128h(_mm_setzero_ph(), b);
+ _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_storeu_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let mut array = [0.0; 8];
+ _mm_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_storeu_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let mut array = [0.0; 16];
+ _mm256_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_storeu_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let mut array = [0.0; 32];
+ _mm512_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_add_ph(a, b);
+ let e = _mm_set1_ph(9.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_add_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_add_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_set_ph() {
- let r = _mm256_set_ph(
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_add_ph(a, b);
+ let e = _mm256_set1_ph(17.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_add_ph() {
+ let a = _mm256_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- let e = _mm256_setr_ph(
+ let b = _mm256_set_ph(
16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
);
+ let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_set_ph() {
- let r = _mm512_set_ph(
+ unsafe fn test_mm512_add_ph() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
- let e = _mm512_setr_ph(
+ let b = _mm512_set_ph(
32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
3.0, 2.0, 1.0,
);
+ let r = _mm512_add_ph(a, b);
+ let e = _mm512_set1_ph(33.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set_sh() {
- let r = _mm_set_sh(1.0);
- let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set1_ph() {
- let r = _mm_set1_ph(1.0);
- let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_set1_ph() {
- let r = _mm256_set1_ph(1.0);
- let e = _mm256_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ unsafe fn test_mm512_mask_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_set1_ph() {
- let r = _mm512_set1_ph(1.0);
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
+ 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_setr_ph() {
- let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_maskz_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
+ 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_setr_ph() {
- let r = _mm256_setr_ph(
+ unsafe fn test_mm512_add_round_ph() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- let e = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
);
- assert_eq_m256h(r, e);
+ let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(33.0);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_setr_ph() {
- let r = _mm512_setr_ph(
+ unsafe fn test_mm512_mask_add_round_ph() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
- let e = _mm512_set_ph(
+ let b = _mm512_set_ph(
32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
3.0, 2.0, 1.0,
);
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
+ 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ );
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_setzero_ph() {
- let r = _mm_setzero_ph();
- let e = _mm_set1_ph(0.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_setzero_ph() {
- let r = _mm256_setzero_ph();
- let e = _mm256_set1_ph(0.0);
- assert_eq_m256h(r, e);
- }
-
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_setzero_ph() {
- let r = _mm512_setzero_ph();
- let e = _mm512_set1_ph(0.0);
+ unsafe fn test_mm512_maskz_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
+ 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castsi128_ph() {
- let a = _mm_set1_epi16(0x3c00);
- let r = _mm_castsi128_ph(a);
- let e = _mm_set1_ph(1.0);
+ unsafe fn test_mm_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castsi256_ph() {
- let a = _mm256_set1_epi16(0x3c00);
- let r = _mm256_castsi256_ph(a);
- let e = _mm256_set1_ph(1.0);
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castsi512_ph() {
- let a = _mm512_set1_epi16(0x3c00);
- let r = _mm512_castsi512_ph(a);
- let e = _mm512_set1_ph(1.0);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_si128() {
- let a = _mm_set1_ph(1.0);
- let r = _mm_castph_si128(a);
- let e = _mm_set1_epi16(0x3c00);
- assert_eq_m128i(r, e);
+ unsafe fn test_mm_mask_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_si256() {
- let a = _mm256_set1_ph(1.0);
- let r = _mm256_castph_si256(a);
- let e = _mm256_set1_epi16(0x3c00);
- assert_eq_m256i(r, e);
+ unsafe fn test_mm_maskz_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_si512() {
- let a = _mm512_set1_ph(1.0);
- let r = _mm512_castph_si512(a);
- let e = _mm512_set1_epi16(0x3c00);
- assert_eq_m512i(r, e);
+ unsafe fn test_mm_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_add_sh(a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castps_ph() {
- let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
- let r = _mm_castps_ph(a);
- let e = _mm_set1_ph(1.0);
+ unsafe fn test_mm_mask_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_add_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_add_sh(src, 1, a, b);
+ let e = _mm_set_sh(3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castps_ph() {
- let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
- let r = _mm256_castps_ph(a);
- let e = _mm256_set1_ph(1.0);
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_maskz_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_add_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_add_sh(1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castps_ph() {
- let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
- let r = _mm512_castps_ph(a);
- let e = _mm512_set1_ph(1.0);
- assert_eq_m512h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_sub_ph(a, b);
+ let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_ps() {
- let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
- let r = _mm_castph_ps(a);
- let e = _mm_set1_ps(1.0);
- assert_eq_m128(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_ps() {
- let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
- let r = _mm256_castph_ps(a);
- let e = _mm256_set1_ps(1.0);
- assert_eq_m256(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_sub_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_ps() {
- let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
- let r = _mm512_castph_ps(a);
- let e = _mm512_set1_ps(1.0);
- assert_eq_m512(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_sub_ph(a, b);
+ let e = _mm256_set_ph(
+ -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
+ 15.0,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castpd_ph() {
- let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
- let r = _mm_castpd_ph(a);
- let e = _mm_set1_ph(1.0);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castpd_ph() {
- let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
- let r = _mm256_castpd_ph(a);
- let e = _mm256_set1_ph(1.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castpd_ph() {
- let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
- let r = _mm512_castpd_ph(a);
- let e = _mm512_set1_ph(1.0);
+ unsafe fn test_mm512_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_sub_ph(a, b);
+ let e = _mm512_set_ph(
+ -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
+ -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
+ 23.0, 25.0, 27.0, 29.0, 31.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_pd() {
- let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
- let r = _mm_castph_pd(a);
- let e = _mm_set1_pd(1.0);
- assert_eq_m128d(r, e);
+ unsafe fn test_mm512_mask_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
+ 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_pd() {
- let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
- let r = _mm256_castph_pd(a);
- let e = _mm256_set1_pd(1.0);
- assert_eq_m256d(r, e);
+ unsafe fn test_mm512_maskz_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
+ 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_pd() {
- let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
- let r = _mm512_castph_pd(a);
- let e = _mm512_set1_pd(1.0);
- assert_eq_m512d(r, e);
+ unsafe fn test_mm512_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set_ph(
+ -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
+ -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
+ 23.0, 25.0, 27.0, 29.0, 31.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph256_ph128() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ unsafe fn test_mm512_mask_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- let r = _mm256_castph256_ph128(a);
- let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- assert_eq_m128h(r, e);
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
+ 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph512_ph128() {
- let a = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
- 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ unsafe fn test_mm512_maskz_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- let r = _mm512_castph512_ph128(a);
- let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
+ 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_sub_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(-1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph512_ph256() {
- let a = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
- 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ unsafe fn test_mm_mask_sub_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
);
- let r = _mm512_castph512_ph256(a);
- let e = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
);
- assert_eq_m256h(r, e);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph128_ph256() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm256_castph128_ph256(a);
- assert_eq_m128h(_mm256_castph256_ph128(r), a);
+ unsafe fn test_mm_maskz_sub_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph128_ph512() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm512_castph128_ph512(a);
- assert_eq_m128h(_mm512_castph512_ph128(r), a);
+ unsafe fn test_mm_sub_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_sub_sh(a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph256_ph512() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
- );
- let r = _mm512_castph256_ph512(a);
- assert_eq_m256h(_mm512_castph512_ph256(r), a);
+ unsafe fn test_mm_mask_sub_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_sub_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_sub_sh(src, 1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_zextph128_ph256() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm256_zextph128_ph256(a);
- let e = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
- );
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_maskz_sub_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_sub_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_sub_sh(1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_zextph128_ph512() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm512_zextph128_ph512(a);
- let e = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
- 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
- );
- assert_eq_m512h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_mul_ph(a, b);
+ let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_zextph256_ph512() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
- );
- let r = _mm512_zextph256_ph512(a);
- let e = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
- 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
- );
- assert_eq_m512h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmp_round_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
- assert_eq!(r, 1);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_mul_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmp_round_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
- assert_eq!(r, 0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_mul_ph(a, b);
+ let e = _mm256_set_ph(
+ 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
+ 30.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmp_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
- assert_eq!(r, 1);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmp_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
- assert_eq!(r, 0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comi_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_mul_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_mul_ph(a, b);
+ let e = _mm512_set_ph(
+ 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
+ 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
+ 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comi_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_mask_mul_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
+ 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comieq_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comieq_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_maskz_mul_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
+ 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comige_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comige_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_mul_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set_ph(
+ 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
+ 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
+ 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comigt_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comigt_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_mask_mul_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
+ 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comile_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_comile_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_maskz_mul_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
+ 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comilt_sh() {
+ unsafe fn test_mm_mul_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_comilt_sh(a, b);
- assert_eq!(r, 1);
+ let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comineq_sh() {
+ unsafe fn test_mm_mask_mul_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_comineq_sh(a, b);
- assert_eq!(r, 1);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomieq_sh() {
+ unsafe fn test_mm_maskz_mul_round_sh() {
let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomieq_sh(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomige_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomige_sh(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomigt_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomigt_sh(a, b);
- assert_eq!(r, 1);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomile_sh() {
+ unsafe fn test_mm_mul_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_ucomile_sh(a, b);
- assert_eq!(r, 1);
+ let r = _mm_mul_sh(a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomilt_sh() {
+ unsafe fn test_mm_mask_mul_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_ucomilt_sh(a, b);
- assert_eq!(r, 1);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_mul_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_mul_sh(src, 1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomineq_sh() {
+ unsafe fn test_mm_maskz_mul_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_ucomineq_sh(a, b);
- assert_eq!(r, 1);
+ let r = _mm_maskz_mul_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_mul_sh(1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_load_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_load_ph(addr_of!(a).cast());
- assert_eq_m128h(a, b);
+ unsafe fn test_mm_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let r = _mm_div_ph(a, b);
+ let e = _mm_set1_ph(0.5);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_load_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_load_ph(addr_of!(a).cast());
- assert_eq_m256h(a, b);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_load_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_load_ph(addr_of!(a).cast());
- assert_eq_m512h(a, b);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_load_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_load_sh(addr_of!(a).cast());
- assert_eq_m128h(a, b);
+ unsafe fn test_mm_mask_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
+ let r = _mm_mask_div_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_load_sh() {
- let a = _mm_set_sh(1.0);
- let src = _mm_set_sh(2.);
- let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
- assert_eq_m128h(a, b);
- let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
- assert_eq_m128h(src, b);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let r = _mm_maskz_div_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_load_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
- assert_eq_m128h(a, b);
- let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
- assert_eq_m128h(_mm_setzero_ph(), b);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let r = _mm256_div_ph(a, b);
+ let e = _mm256_set1_ph(0.5);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_loadu_ph() {
- let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
- let r = _mm_loadu_ph(array.as_ptr());
- let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm256_mask_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let src = _mm256_set_ph(
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0,
+ );
+ let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_loadu_ph() {
- let array = [
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- ];
- let r = _mm256_loadu_ph(array.as_ptr());
- let e = _mm256_setr_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_loadu_ph() {
- let array = [
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- ];
- let r = _mm512_loadu_ph(array.as_ptr());
- let e = _mm512_setr_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
+ unsafe fn test_mm512_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_div_ph(a, b);
+ let e = _mm512_set1_ph(0.5);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let r = _mm_move_sh(a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_mask_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let src = _mm512_set_ph(
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
+ 33.0, 34.0, 35.0,
+ );
+ let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let src = _mm_set_sh(10.0);
- let r = _mm_mask_move_sh(src, 0, a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_maskz_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let r = _mm_maskz_move_sh(0, a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_store_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let mut b = _mm_setzero_ph();
- _mm_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m128h(a, b);
+ unsafe fn test_mm512_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(0.5);
+ assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_store_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let src = _mm512_set_ph(
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
+ 33.0, 34.0, 35.0,
);
- let mut b = _mm256_setzero_ph();
- _mm256_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m256h(a, b);
+ let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_store_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
+ unsafe fn test_mm512_maskz_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
);
- let mut b = _mm512_setzero_ph();
- _mm512_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m512h(a, b);
+ let e = _mm512_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_store_sh() {
+ unsafe fn test_mm_div_round_sh() {
let a = _mm_set_sh(1.0);
- let mut b = _mm_setzero_ph();
- _mm_store_sh(addr_of_mut!(b).cast(), a);
- assert_eq_m128h(a, b);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_store_sh() {
+ unsafe fn test_mm_mask_div_round_sh() {
let a = _mm_set_sh(1.0);
- let mut b = _mm_setzero_ph();
- _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
- assert_eq_m128h(_mm_setzero_ph(), b);
- _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
- assert_eq_m128h(a, b);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_storeu_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let mut array = [0.0; 8];
- _mm_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_div_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_storeu_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let mut array = [0.0; 16];
- _mm256_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_div_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_div_sh(a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_storeu_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let mut array = [0.0; 32];
- _mm512_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
+ unsafe fn test_mm_mask_div_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_div_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_div_sh(src, 1, a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_add_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_add_ph(a, b);
- let e = _mm_set1_ph(9.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_div_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_div_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_div_sh(1, a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_add_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_add_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
+ unsafe fn test_mm_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_mul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_add_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_add_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
+ unsafe fn test_mm_mask_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_mul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_add_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_add_ph(a, b);
- let e = _mm256_set1_ph(17.0);
+ unsafe fn test_mm_maskz_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_maskz_mul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_mul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_add_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ unsafe fn test_mm256_mask_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
+ let r = _mm256_mask_mul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_add_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
+ unsafe fn test_mm256_maskz_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_mul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_add_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_add_ph(a, b);
- let e = _mm512_set1_ph(33.0);
+ unsafe fn test_mm512_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_mul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_add_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
- 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_add_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
- 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ unsafe fn test_mm512_maskz_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_add_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(33.0);
+ unsafe fn test_mm512_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_add_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
- 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_add_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
- 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_add_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(3.0);
+ unsafe fn test_mm_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_add_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(3.0);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_add_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm_maskz_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
let r =
- _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(3.0);
+ _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_add_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_add_sh(a, b);
- let e = _mm_set_sh(3.0);
+ unsafe fn test_mm_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_mul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_add_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_add_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_add_sh(src, 1, a, b);
- let e = _mm_set_sh(3.0);
+ unsafe fn test_mm_mask_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_mul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_add_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_add_sh(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_add_sh(1, a, b);
- let e = _mm_set_sh(3.0);
+ unsafe fn test_mm_maskz_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_maskz_mul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_sub_ph(a, b);
- let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
+ unsafe fn test_mm_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_fmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
+ unsafe fn test_mm_mask_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_fmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_sub_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
+ unsafe fn test_mm_maskz_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_maskz_fmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_sub_ph(a, b);
- let e = _mm256_set_ph(
- -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
- 15.0,
- );
+ unsafe fn test_mm256_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_fmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ unsafe fn test_mm256_mask_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
+ let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
+ unsafe fn test_mm256_maskz_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_sub_ph(a, b);
- let e = _mm512_set_ph(
- -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
- -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
- 23.0, 25.0, 27.0, 29.0, 31.0,
- );
+ unsafe fn test_mm512_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_fmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
- 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
- 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ unsafe fn test_mm512_maskz_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set_ph(
- -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
- -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
- 23.0, 25.0, 27.0, 29.0, 31.0,
- );
+ unsafe fn test_mm512_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
- 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
- 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(-1.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(-1.0);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm_maskz_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
let r =
- _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(-1.0);
+ _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_sub_sh(a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_fmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_sub_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_sub_sh(src, 1, a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_mask_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_sub_sh(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_sub_sh(1, a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_maskz_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_maskz_fmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_mul_ph(a, b);
- let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
+ unsafe fn test_mm_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_cmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
+ unsafe fn test_mm_mask_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_cmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_mul_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
+ unsafe fn test_mm_maskz_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_maskz_cmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_mul_ph(a, b);
- let e = _mm256_set_ph(
- 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
- 30.0, 16.0,
- );
+ unsafe fn test_mm256_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_cmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ unsafe fn test_mm256_mask_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
+ let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
+ unsafe fn test_mm256_maskz_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_mul_ph(a, b);
- let e = _mm512_set_ph(
- 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
- 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
- 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
- );
+ unsafe fn test_mm512_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_cmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
- 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
- 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ unsafe fn test_mm512_maskz_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set_ph(
- 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
- 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
- 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
- );
+ unsafe fn test_mm512_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
- 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
- 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_cmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_mask_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_cmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r =
- _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_maskz_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_maskz_cmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_mul_sh(a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_mul_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_mul_sh(src, 1, a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_mask_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_mul_sh(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_mul_sh(1, a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_maskz_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r =
+ _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_div_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let r = _mm_div_ph(a, b);
- let e = _mm_set1_ph(0.5);
+ unsafe fn test_mm_fcmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_fcmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_div_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
- let r = _mm_mask_div_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
+ unsafe fn test_mm_mask_fcmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_fcmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_div_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let r = _mm_maskz_div_ph(0b01010101, a, b);
- let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ unsafe fn test_mm_maskz_fcmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_maskz_fcmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_div_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let r = _mm256_div_ph(a, b);
- let e = _mm256_set1_ph(0.5);
+ unsafe fn test_mm256_fcmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_fcmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_div_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let src = _mm256_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0,
+ unsafe fn test_mm256_mask_fcmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_div_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ unsafe fn test_mm256_maskz_fcmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_div_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_div_ph(a, b);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_fcmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_fcmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_div_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let src = _mm512_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
- 33.0, 34.0, 35.0,
+ unsafe fn test_mm512_mask_fcmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
- 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_div_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ unsafe fn test_mm512_maskz_fcmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_div_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_fcmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_div_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let src = _mm512_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
- 33.0, 34.0, 35.0,
+ unsafe fn test_mm512_mask_fcmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
- 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_div_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_fcmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_fcmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_fcmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_mask_fcmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fcmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r =
- _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_maskz_fcmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_maskz_fcmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_div_sh(a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_fcmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_div_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_div_sh(src, 1, a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_mask_fcmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_div_sh(0, a, b);
- let e = _mm_set_sh(0.0);
+ unsafe fn test_mm_maskz_fcmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r =
+ _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
- let r = _mm_maskz_div_sh(1, a, b);
- let e = _mm_set_sh(0.5);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_abs_ph() {
+ let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0);
+ let r = _mm_abs_ph(a);
+ let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mul_pch() {
+ unsafe fn test_mm256_abs_ph() {
+ let a = _mm256_set_ph(
+ -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
+ -14.0,
+ );
+ let r = _mm256_abs_ph(a);
+ let e = _mm256_set_ph(
+ 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_abs_ph() {
+ let a = _mm512_set_ph(
+ -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
+ -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0,
+ 27.0, -28.0, 29.0, -30.0,
+ );
+ let r = _mm512_abs_ph(a);
+ let e = _mm512_set_ph(
+ 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
+ 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0,
+ 29.0, 30.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_conj_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_mul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ let r = _mm_conj_pch(a);
+ let e = _mm_set1_pch(0.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_mul_pch() {
+ unsafe fn test_mm_mask_conj_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_mul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ let r = _mm_mask_conj_pch(src, 0b0101, a);
+ let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_mul_pch() {
+ unsafe fn test_mm_maskz_conj_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_maskz_mul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ let r = _mm_maskz_conj_pch(0b0101, a);
+ let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mul_pch() {
+ unsafe fn test_mm256_conj_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_mul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ let r = _mm256_conj_pch(a);
+ let e = _mm256_set1_pch(0.0, -1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_mul_pch() {
+ unsafe fn test_mm256_mask_conj_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
let src = _mm256_setr_ph(
2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_mul_pch(src, 0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- );
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_mul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_maskz_mul_pch(0b01010101, a, b);
+ let r = _mm256_mask_conj_pch(src, 0b01010101, a);
let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_mul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_conj_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_conj_pch(0b01010101, a);
+ let e = _mm256_setr_ph(
+ 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
);
- assert_eq_m512h(r, e);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_round_pch() {
+ unsafe fn test_mm512_conj_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let r = _mm512_conj_pch(a);
+ let e = _mm512_set1_pch(0.0, -1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_round_pch() {
+ unsafe fn test_mm512_mask_conj_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
let src = _mm512_setr_ph(
2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
32.0, 33.0,
);
- let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
- 0b0101010101010101,
- a,
- b,
- );
+ let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
+ 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0,
33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_round_pch() {
+ unsafe fn test_mm512_maskz_conj_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
- a,
- b,
- );
+ let r = _mm512_maskz_conj_pch(0b0101010101010101, a);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
+ 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r =
- _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_mul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_mul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_maskz_mul_sch(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_fmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_fmadd_pch(a, b, c);
+ let e = _mm_set1_pch(-2.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmul_pch() {
+ unsafe fn test_mm_mask_fmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_fmul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask_fmadd_pch(a, 0b0101, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmul_pch() {
+ unsafe fn test_mm_mask3_fmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_fmul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101);
+ let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmul_pch() {
+ unsafe fn test_mm_maskz_fmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_maskz_fmul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_maskz_fmadd_pch(0b0101, a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmul_pch() {
+ unsafe fn test_mm256_fmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_fmul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_fmadd_pch(a, b, c);
+ let e = _mm256_set1_pch(-2.0, 3.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmul_pch() {
+ unsafe fn test_mm256_mask_fmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c);
+ let e = _mm256_setr_ph(
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
);
- let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101);
let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmul_pch() {
+ unsafe fn test_mm256_maskz_fmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c);
let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmul_pch() {
+ unsafe fn test_mm512_fmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_fmul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_fmadd_pch(a, b, c);
+ let e = _mm512_set1_pch(-2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmul_pch() {
+ unsafe fn test_mm512_mask_fmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c);
+ let e = _mm512_setr_ph(
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
);
- let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmul_pch() {
+ unsafe fn test_mm512_maskz_fmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmul_round_pch() {
+ unsafe fn test_mm512_fmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r =
+ _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pch(-2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmul_round_pch() {
+ unsafe fn test_mm512_mask_fmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
0b0101010101010101,
+ b,
+ c,
+ );
+ let e = _mm512_setr_ph(
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b0101010101010101,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmul_round_pch() {
+ unsafe fn test_mm512_maskz_fmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b0101010101010101,
a,
b,
+ c,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmul_round_sch() {
+ unsafe fn test_mm_fmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fmadd_sch(a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmul_round_sch() {
+ unsafe fn test_mm_mask_fmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fmadd_sch(a, 0, b, c);
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fmadd_sch(a, 1, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmul_round_sch() {
+ unsafe fn test_mm_mask3_fmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r =
- _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fmadd_sch(a, b, c, 0);
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fmadd_sch(a, b, c, 1);
+ let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fmadd_sch(0, a, b, c);
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmadd_sch(1, a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmul_sch() {
+ unsafe fn test_mm_fmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_fmul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmul_sch() {
+ unsafe fn test_mm_mask_fmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fmul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmul_sch() {
+ unsafe fn test_mm_mask3_fmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_maskz_fmul_sch(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
+ );
+ let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_cmul_pch() {
+ unsafe fn test_mm_fcmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_cmul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_fcmadd_pch(a, b, c);
+ let e = _mm_set1_pch(2.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_cmul_pch() {
+ unsafe fn test_mm_mask_fcmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_cmul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_cmul_pch() {
+ unsafe fn test_mm_mask3_fcmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_maskz_cmul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101);
+ let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_cmul_pch() {
+ unsafe fn test_mm_maskz_fcmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fcmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_cmul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_fcmadd_pch(a, b, c);
+ let e = _mm256_set1_pch(2.0, 3.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_cmul_pch() {
+ unsafe fn test_mm256_mask_fcmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c);
+ let e = _mm256_setr_ph(
+ 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
);
- let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fcmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101);
let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_cmul_pch() {
+ unsafe fn test_mm256_maskz_fcmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c);
let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_cmul_pch() {
+ unsafe fn test_mm512_fcmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_cmul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_fcmadd_pch(a, b, c);
+ let e = _mm512_set1_pch(2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_cmul_pch() {
+ unsafe fn test_mm512_mask_fcmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c);
+ let e = _mm512_setr_ph(
+ 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
+ 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
);
- let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fcmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
+ 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_cmul_pch() {
+ unsafe fn test_mm512_maskz_fcmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
+ 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_cmul_round_pch() {
+ unsafe fn test_mm512_fcmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r =
+ _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pch(2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_cmul_round_pch() {
+ unsafe fn test_mm512_mask_fcmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
0b0101010101010101,
+ b,
+ c,
+ );
+ let e = _mm512_setr_ph(
+ 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
+ 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fcmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b0101010101010101,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
+ 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_cmul_round_pch() {
+ unsafe fn test_mm512_maskz_fcmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b0101010101010101,
a,
b,
+ c,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
+ 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmul_sch() {
+ unsafe fn test_mm_fcmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_cmul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fcmadd_sch(a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmul_sch() {
+ unsafe fn test_mm_mask_fcmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_cmul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fcmadd_sch(a, 0, b, c);
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fcmadd_sch(a, 1, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_cmul_sch() {
+ unsafe fn test_mm_mask3_fcmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_maskz_cmul_sch(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fcmadd_sch(a, b, c, 0);
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fcmadd_sch(a, b, c, 1);
+ let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fcmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fcmadd_sch(0, a, b, c);
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fcmadd_sch(1, a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmul_round_sch() {
+ unsafe fn test_mm_fcmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmul_round_sch() {
+ unsafe fn test_mm_mask_fcmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_cmul_round_sch() {
+ unsafe fn test_mm_mask3_fcmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r =
- _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
+ );
+ let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fcmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fcmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_fcmul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fmadd_ph(a, b, c);
+ let e = _mm_set1_ph(5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fcmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_fcmul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ unsafe fn test_mm_mask_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fcmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_maskz_fcmul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ unsafe fn test_mm_mask3_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fcmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_fcmul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm_maskz_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fmadd_ph(a, b, c);
+ let e = _mm256_set1_ph(5.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fcmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ unsafe fn test_mm256_mask_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c);
+ let e = _mm256_set_ph(
+ 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
);
- let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101);
+ let e = _mm256_set_ph(
+ 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fcmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm256_maskz_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_fcmul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm512_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmadd_ph(a, b, c);
+ let e = _mm512_set1_ph(5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ unsafe fn test_mm512_mask_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let e = _mm512_set_ph(
+ 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
+ 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
);
- let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let e = _mm512_set_ph(
+ 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
+ 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm512_maskz_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
+ 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm512_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
- 0b0101010101010101,
+ unsafe fn test_mm512_mask_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
+ 0b01010101010101010101010101010101,
b,
+ c,
);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ let e = _mm512_set_ph(
+ 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
+ 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
+ unsafe fn test_mm512_mask3_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b01010101010101010101010101010101,
);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ let e = _mm512_set_ph(
+ 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
+ 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_fcmul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fcmul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_maskz_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ c,
+ );
+ let e = _mm512_set_ph(
+ 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
+ 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_maskz_fcmul_sch(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmadd_sh(a, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmadd_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_mask_fmadd_sh(a, 1, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r =
- _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask3_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmadd_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_abs_ph() {
- let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0);
- let r = _mm_abs_ph(a);
- let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
+ let r = _mm_mask3_fmadd_sh(a, b, c, 1);
+ let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_abs_ph() {
- let a = _mm256_set_ph(
- -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
- -14.0,
- );
- let r = _mm256_abs_ph(a);
- let e = _mm256_set_ph(
- 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
- );
- assert_eq_m256h(r, e);
- }
-
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_abs_ph() {
- let a = _mm512_set_ph(
- -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
- -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0,
- 27.0, -28.0, 29.0, -30.0,
- );
- let r = _mm512_abs_ph(a);
- let e = _mm512_set_ph(
- 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
- 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0,
- 29.0, 30.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_conj_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let r = _mm_conj_pch(a);
- let e = _mm_set1_pch(0.0, -1.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_conj_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_conj_pch(src, 0b0101, a);
- let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0);
+ unsafe fn test_mm_maskz_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmadd_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_conj_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let r = _mm_maskz_conj_pch(0b0101, a);
- let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0);
+ let r = _mm_maskz_fmadd_sh(1, a, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_conj_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_conj_pch(a);
- let e = _mm256_set1_pch(0.0, -1.0);
- assert_eq_m256h(r, e);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_conj_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- );
- let r = _mm256_mask_conj_pch(src, 0b01010101, a);
- let e = _mm256_setr_ph(
- 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
);
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_conj_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_maskz_conj_pch(0b01010101, a);
- let e = _mm256_setr_ph(
- 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
);
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_conj_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_conj_pch(a);
- let e = _mm512_set1_pch(0.0, -1.0);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_conj_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ unsafe fn test_mm_mask3_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
);
- let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a);
- let e = _mm512_setr_ph(
- 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
- 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0,
- 33.0,
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_conj_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_conj_pch(0b0101010101010101, a);
- let e = _mm512_setr_ph(
- 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
- 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
+ unsafe fn test_mm_maskz_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_fmadd_pch(a, b, c);
- let e = _mm_set1_pch(-2.0, 3.0);
+ unsafe fn test_mm_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fmsub_ph(a, b, c);
+ let e = _mm_set1_ph(-1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask_fmadd_pch(a, 0b0101, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0);
+ unsafe fn test_mm_mask_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101);
- let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0);
+ unsafe fn test_mm_mask3_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_maskz_fmadd_pch(0b0101, a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0);
+ unsafe fn test_mm_maskz_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_fmadd_pch(a, b, c);
- let e = _mm256_set1_pch(-2.0, 3.0);
+ unsafe fn test_mm256_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fmsub_ph(a, b, c);
+ let e = _mm256_set1_ph(-1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c);
- let e = _mm256_setr_ph(
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ unsafe fn test_mm256_mask_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c);
+ let e = _mm256_set_ph(
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101);
- let e = _mm256_setr_ph(
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm256_mask3_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101);
+ let e = _mm256_set_ph(
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c);
- let e = _mm256_setr_ph(
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm256_maskz_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_fmadd_pch(a, b, c);
- let e = _mm512_set1_pch(-2.0, 3.0);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
- );
+ unsafe fn test_mm512_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmsub_ph(a, b, c);
+ let e = _mm512_set1_ph(-1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm512_mask_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c);
+ let e = _mm512_set_ph(
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm512_mask3_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101);
+ let e = _mm512_set_ph(
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r =
- _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_pch(-2.0, 3.0);
+ unsafe fn test_mm512_maskz_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(-1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
b,
c,
);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ let e = _mm512_set_ph(
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask3_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ let e = _mm512_set_ph(
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
+ unsafe fn test_mm512_maskz_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
c,
);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ let e = _mm512_set_ph(
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fmadd_sch(a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmsub_sh(a, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fmadd_sch(a, 0, b, c);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmsub_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_sch(a, 1, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_mask_fmsub_sh(a, 1, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fmadd_sch(a, b, c, 0);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ unsafe fn test_mm_mask3_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmsub_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_sch(a, b, c, 1);
- let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fmsub_sh(a, b, c, 1);
+ let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fmadd_sch(0, a, b, c);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_maskz_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmsub_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_sch(1, a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_maskz_fmsub_sh(1, a, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 0, b, c,
);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 1, b, c,
);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask3_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 0,
);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 1,
);
- let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_maskz_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0, a, b, c,
);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
1, a, b, c,
);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_fcmadd_pch(a, b, c);
- let e = _mm_set1_pch(2.0, 3.0);
+ unsafe fn test_mm_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fnmadd_ph(a, b, c);
+ let e = _mm_set1_ph(1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0);
+ unsafe fn test_mm_mask_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101);
- let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0);
+ unsafe fn test_mm_mask3_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0);
+ unsafe fn test_mm_maskz_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_fcmadd_pch(a, b, c);
- let e = _mm256_set1_pch(2.0, 3.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fnmadd_ph(a, b, c);
+ let e = _mm256_set1_ph(1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c);
- let e = _mm256_setr_ph(
- 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ unsafe fn test_mm256_mask_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c);
+ let e = _mm256_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101);
- let e = _mm256_setr_ph(
- 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm256_mask3_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101);
+ let e = _mm256_set_ph(
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c);
- let e = _mm256_setr_ph(
- 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm256_maskz_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_fcmadd_pch(a, b, c);
- let e = _mm512_set1_pch(2.0, 3.0);
+ unsafe fn test_mm512_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fnmadd_ph(a, b, c);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
- 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ unsafe fn test_mm512_mask_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
- 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm512_mask3_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let e = _mm512_set_ph(
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
- 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm512_maskz_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
+ unsafe fn test_mm512_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
let r =
- _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_pch(2.0, 3.0);
+ _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
b,
c,
);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
- 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask3_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
- 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
+ let e = _mm512_set_ph(
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
+ unsafe fn test_mm512_maskz_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
c,
);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
- 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
+ let e = _mm512_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fcmadd_sch(a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fnmadd_sh(a, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fcmadd_sch(a, 0, b, c);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fnmadd_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fcmadd_sch(a, 1, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_mask_fnmadd_sh(a, 1, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fcmadd_sch(a, b, c, 0);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ unsafe fn test_mm_mask3_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmadd_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fcmadd_sch(a, b, c, 1);
- let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fnmadd_sh(a, b, c, 1);
+ let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fcmadd_sch(0, a, b, c);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_maskz_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fnmadd_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fcmadd_sch(1, a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_maskz_fnmadd_sh(1, a, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 0, b, c,
);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 1, b, c,
);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask3_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 0,
);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 1,
);
- let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_maskz_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0, a, b, c,
);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
1, a, b, c,
);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmadd_ph() {
+ unsafe fn test_mm_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_fmadd_ph(a, b, c);
- let e = _mm_set1_ph(5.0);
+ let r = _mm_fnmsub_ph(a, b, c);
+ let e = _mm_set1_ph(-5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmadd_ph() {
+ unsafe fn test_mm_mask_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0);
+ let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmadd_ph() {
+ unsafe fn test_mm_mask3_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0);
+ let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmadd_ph() {
+ unsafe fn test_mm_maskz_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0);
+ let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmadd_ph() {
+ unsafe fn test_mm256_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmadd_ph(a, b, c);
- let e = _mm256_set1_ph(5.0);
+ let r = _mm256_fnmsub_ph(a, b, c);
+ let e = _mm256_set1_ph(-5.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmadd_ph() {
+ unsafe fn test_mm256_mask_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c);
+ let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c);
let e = _mm256_set_ph(
- 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmadd_ph() {
+ unsafe fn test_mm256_mask3_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101);
+ let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101);
let e = _mm256_set_ph(
- 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmadd_ph() {
+ unsafe fn test_mm256_maskz_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c);
+ let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c);
let e = _mm256_set_ph(
- 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_ph() {
+ unsafe fn test_mm512_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmadd_ph(a, b, c);
- let e = _mm512_set1_ph(5.0);
+ let r = _mm512_fnmsub_ph(a, b, c);
+ let e = _mm512_set1_ph(-5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_ph() {
+ unsafe fn test_mm512_mask_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c);
let e = _mm512_set_ph(
- 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
- 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_ph() {
+ unsafe fn test_mm512_mask3_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101);
let e = _mm512_set_ph(
- 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
- 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_ph() {
+ unsafe fn test_mm512_maskz_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c);
let e = _mm512_set_ph(
- 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
- 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_round_ph() {
+ unsafe fn test_mm512_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(5.0);
+ let r =
+ _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(-5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_round_ph() {
+ unsafe fn test_mm512_mask_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
0b01010101010101010101010101010101,
b,
c,
);
let e = _mm512_set_ph(
- 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
- 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_round_ph() {
+ unsafe fn test_mm512_mask3_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
0b01010101010101010101010101010101,
);
let e = _mm512_set_ph(
- 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
- 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_round_ph() {
+ unsafe fn test_mm512_maskz_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
b,
c,
);
let e = _mm512_set_ph(
- 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
- 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_sh() {
+ unsafe fn test_mm_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmadd_sh(a, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_fnmsub_sh(a, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_sh() {
+ unsafe fn test_mm_mask_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmadd_sh(a, 0, b, c);
+ let r = _mm_mask_fnmsub_sh(a, 0, b, c);
let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_sh(a, 1, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_mask_fnmsub_sh(a, 1, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_sh() {
+ unsafe fn test_mm_mask3_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmadd_sh(a, b, c, 0);
+ let r = _mm_mask3_fnmsub_sh(a, b, c, 0);
let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_sh(a, b, c, 1);
- let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmsub_sh(a, b, c, 1);
+ let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_sh() {
+ unsafe fn test_mm_maskz_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmadd_sh(0, a, b, c);
+ let r = _mm_maskz_fnmsub_sh(0, a, b, c);
let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_sh(1, a, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_maskz_fnmsub_sh(1, a, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_round_sh() {
+ unsafe fn test_mm_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_round_sh() {
+ unsafe fn test_mm_mask_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 0, b, c,
);
let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 1, b, c,
);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_round_sh() {
+ unsafe fn test_mm_mask3_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 0,
);
let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 1,
);
- let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
+ let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_round_sh() {
+ unsafe fn test_mm_maskz_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0, a, b, c,
);
let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
1, a, b, c,
);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmsub_ph() {
+ unsafe fn test_mm_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_fmsub_ph(a, b, c);
- let e = _mm_set1_ph(-1.0);
+ let r = _mm_fmaddsub_ph(a, b, c);
+ let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmsub_ph() {
+ unsafe fn test_mm_mask_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0);
+ let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c);
+ let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmsub_ph() {
+ unsafe fn test_mm_mask3_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0);
+ let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011);
+ let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmsub_ph() {
+ unsafe fn test_mm_maskz_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0);
+ let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c);
+ let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmsub_ph() {
+ unsafe fn test_mm256_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmsub_ph(a, b, c);
- let e = _mm256_set1_ph(-1.0);
+ let r = _mm256_fmaddsub_ph(a, b, c);
+ let e = _mm256_set_ph(
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmsub_ph() {
+ unsafe fn test_mm256_mask_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c);
+ let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c);
let e = _mm256_set_ph(
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmsub_ph() {
+ unsafe fn test_mm256_mask3_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101);
+ let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011);
let e = _mm256_set_ph(
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmsub_ph() {
+ unsafe fn test_mm256_maskz_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c);
+ let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c);
let e = _mm256_set_ph(
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsub_ph() {
+ unsafe fn test_mm512_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmsub_ph(a, b, c);
- let e = _mm512_set1_ph(-1.0);
+ let r = _mm512_fmaddsub_ph(a, b, c);
+ let e = _mm512_set_ph(
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsub_ph() {
+ unsafe fn test_mm512_mask_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c);
+ let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c);
let e = _mm512_set_ph(
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsub_ph() {
+ unsafe fn test_mm512_mask3_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101);
+ let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011);
let e = _mm512_set_ph(
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsub_ph() {
+ unsafe fn test_mm512_maskz_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c);
+ let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c);
let e = _mm512_set_ph(
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsub_round_ph() {
+ unsafe fn test_mm512_fmaddsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(-1.0);
+ let r =
+ _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set_ph(
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsub_round_ph() {
+ unsafe fn test_mm512_mask_fmaddsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- 0b01010101010101010101010101010101,
+ 0b00110011001100110011001100110011,
b,
c,
);
let e = _mm512_set_ph(
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsub_round_ph() {
+ unsafe fn test_mm512_mask3_fmaddsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
- 0b01010101010101010101010101010101,
+ 0b00110011001100110011001100110011,
);
let e = _mm512_set_ph(
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsub_round_ph() {
+ unsafe fn test_mm512_maskz_fmaddsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00110011001100110011001100110011,
a,
b,
c,
);
let e = _mm512_set_ph(
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmsub_sh(a, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmsub_sh(a, 0, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fmsub_sh(a, 1, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmsub_sh(a, b, c, 0);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- let r = _mm_mask3_fmsub_sh(a, b, c, 1);
- let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmsub_sh(0, a, b, c);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fmsub_sh(1, a, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 0, b, c,
- );
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 1, b, c,
- );
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 0,
- );
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 1,
- );
- let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0, a, b, c,
- );
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 1, a, b, c,
- );
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fnmadd_ph() {
+ unsafe fn test_mm_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_fnmadd_ph(a, b, c);
- let e = _mm_set1_ph(1.0);
+ let r = _mm_fmsubadd_ph(a, b, c);
+ let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fnmadd_ph() {
+ unsafe fn test_mm_mask_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
+ let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c);
+ let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fnmadd_ph() {
+ unsafe fn test_mm_mask3_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
+ let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011);
+ let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fnmadd_ph() {
+ unsafe fn test_mm_maskz_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+ let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c);
+ let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fnmadd_ph() {
+ unsafe fn test_mm256_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_fnmadd_ph(a, b, c);
- let e = _mm256_set1_ph(1.0);
+ let r = _mm256_fmsubadd_ph(a, b, c);
+ let e = _mm256_set_ph(
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fnmadd_ph() {
+ unsafe fn test_mm256_mask_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c);
+ let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c);
let e = _mm256_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fnmadd_ph() {
+ unsafe fn test_mm256_mask3_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101);
+ let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011);
let e = _mm256_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fnmadd_ph() {
+ unsafe fn test_mm256_maskz_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c);
+ let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c);
let e = _mm256_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmadd_ph() {
+ unsafe fn test_mm512_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fnmadd_ph(a, b, c);
- let e = _mm512_set1_ph(1.0);
+ let r = _mm512_fmsubadd_ph(a, b, c);
+ let e = _mm512_set_ph(
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmadd_ph() {
+ unsafe fn test_mm512_mask_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c);
let e = _mm512_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmadd_ph() {
+ unsafe fn test_mm512_mask3_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011);
let e = _mm512_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
- 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmadd_ph() {
+ unsafe fn test_mm512_maskz_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c);
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmadd_round_ph() {
+ unsafe fn test_mm512_fmsubadd_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
let r =
- _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(1.0);
+ _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set_ph(
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmadd_round_ph() {
+ unsafe fn test_mm512_mask_fmsubadd_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- 0b01010101010101010101010101010101,
+ 0b00110011001100110011001100110011,
b,
c,
);
let e = _mm512_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmadd_round_ph() {
+ unsafe fn test_mm512_mask3_fmsubadd_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
- 0b01010101010101010101010101010101,
+ 0b00110011001100110011001100110011,
);
let e = _mm512_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
- 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmadd_round_ph() {
+ unsafe fn test_mm512_maskz_fmsubadd_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00110011001100110011001100110011,
a,
b,
c,
);
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_rcp_ph() {
+ let a = _mm_set1_ph(2.0);
+ let r = _mm_rcp_ph(a);
+ let e = _mm_set1_ph(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_rcp_ph() {
+ let a = _mm_set1_ph(2.0);
+ let src = _mm_set1_ph(1.0);
+ let r = _mm_mask_rcp_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_rcp_ph() {
+ let a = _mm_set1_ph(2.0);
+ let r = _mm_maskz_rcp_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_rcp_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let r = _mm256_rcp_ph(a);
+ let e = _mm256_set1_ph(0.5);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_rcp_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let src = _mm256_set1_ph(1.0);
+ let r = _mm256_mask_rcp_ph(src, 0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_rcp_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let r = _mm256_maskz_rcp_ph(0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_rcp_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let r = _mm512_rcp_ph(a);
+ let e = _mm512_set1_ph(0.5);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmadd_sh(a, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm512_mask_rcp_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_rcp_ph(src, 0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
+ 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_rcp_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_rcp_ph(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_rcp_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_rcp_sh(a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmadd_sh(a, 0, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_mask_rcp_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_rcp_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_mask_fnmadd_sh(a, 1, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_mask_rcp_sh(src, 1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmadd_sh(a, b, c, 0);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ unsafe fn test_mm_maskz_rcp_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_maskz_rcp_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmadd_sh(a, b, c, 1);
- let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_rcp_sh(1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmadd_sh(0, a, b, c);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_rsqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_rsqrt_ph(a);
+ let e = _mm_set1_ph(0.5);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmadd_sh(1, a, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_rsqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let src = _mm_set1_ph(1.0);
+ let r = _mm_mask_rsqrt_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_rsqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_maskz_rsqrt_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
assert_eq_m128h(r, e);
}
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_rsqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_rsqrt_ph(a);
+ let e = _mm256_set1_ph(0.5);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_rsqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let src = _mm256_set1_ph(1.0);
+ let r = _mm256_mask_rsqrt_ph(src, 0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_rsqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_maskz_rsqrt_ph(0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_rsqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_rsqrt_ph(a);
+ let e = _mm512_set1_ph(0.5);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_rsqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_rsqrt_ph(src, 0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
+ 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 0, b, c,
- );
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 1, b, c,
+ unsafe fn test_mm512_maskz_rsqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_maskz_rsqrt_ph(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 0,
- );
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 1,
- );
- let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
+ unsafe fn test_mm_rsqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_rsqrt_sh(a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0, a, b, c,
- );
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_mask_rsqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_rsqrt_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 1, a, b, c,
- );
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_mask_rsqrt_sh(src, 1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_fnmsub_ph(a, b, c);
- let e = _mm_set1_ph(-5.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_rsqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_maskz_rsqrt_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0);
+ let r = _mm_maskz_rsqrt_sh(1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0);
+ unsafe fn test_mm_sqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_sqrt_ph(a);
+ let e = _mm_set1_ph(2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0);
+ unsafe fn test_mm_mask_sqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let src = _mm_set1_ph(1.0);
+ let r = _mm_mask_sqrt_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_fnmsub_ph(a, b, c);
- let e = _mm256_set1_ph(-5.0);
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_maskz_sqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_maskz_sqrt_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c);
- let e = _mm256_set_ph(
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- );
+ unsafe fn test_mm256_sqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_sqrt_ph(a);
+ let e = _mm256_set1_ph(2.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101);
+ unsafe fn test_mm256_mask_sqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let src = _mm256_set1_ph(1.0);
+ let r = _mm256_mask_sqrt_ph(src, 0b0101010101010101, a);
let e = _mm256_set_ph(
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c);
+ unsafe fn test_mm256_maskz_sqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_maskz_sqrt_ph(0b0101010101010101, a);
let e = _mm256_set_ph(
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_fnmsub_ph(a, b, c);
- let e = _mm512_set1_ph(-5.0);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c);
- let e = _mm512_set_ph(
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- );
+ unsafe fn test_mm512_sqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_sqrt_ph(a);
+ let e = _mm512_set1_ph(2.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101);
+ unsafe fn test_mm512_mask_sqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_sqrt_ph(src, 0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c);
+ unsafe fn test_mm512_maskz_sqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_maskz_sqrt_ph(0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r =
- _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(-5.0);
+ unsafe fn test_mm512_sqrt_round_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm512_set1_ph(2.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a,
+ unsafe fn test_mm512_mask_sqrt_round_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
0b01010101010101010101010101010101,
- b,
- c,
- );
- let e = _mm512_set_ph(
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- b,
- c,
- 0b01010101010101010101010101010101,
);
let e = _mm512_set_ph(
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_sqrt_round_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
- b,
- c,
);
let e = _mm512_set_ph(
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmsub_sh(a, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmsub_sh(a, 0, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fnmsub_sh(a, 1, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_sqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_sqrt_sh(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmsub_sh(a, b, c, 0);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ unsafe fn test_mm_mask_sqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_sqrt_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmsub_sh(a, b, c, 1);
- let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_sqrt_sh(src, 1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmsub_sh(0, a, b, c);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmsub_sh(1, a, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_maskz_sqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_maskz_sqrt_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_maskz_sqrt_sh(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 0, b, c,
- );
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 1, b, c,
- );
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_sqrt_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 0,
+ unsafe fn test_mm_mask_sqrt_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 1,
+ let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
);
- let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0, a, b, c,
- );
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 1, a, b, c,
- );
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_maskz_sqrt_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r =
+ _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_fmaddsub_ph(a, b, c);
- let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0);
+ let r =
+ _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c);
- let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0);
+ unsafe fn test_mm_max_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_max_ph(a, b);
+ let e = _mm_set1_ph(2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011);
- let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0);
+ unsafe fn test_mm_mask_max_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let src = _mm_set1_ph(3.0);
+ let r = _mm_mask_max_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c);
- let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0);
+ unsafe fn test_mm_maskz_max_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_maskz_max_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmaddsub_ph(a, b, c);
- let e = _mm256_set_ph(
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- );
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c);
- let e = _mm256_set_ph(
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- );
+ unsafe fn test_mm256_max_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_max_ph(a, b);
+ let e = _mm256_set1_ph(2.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011);
+ unsafe fn test_mm256_mask_max_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let src = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_max_ph(src, 0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c);
+ unsafe fn test_mm256_maskz_max_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_maskz_max_ph(0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmaddsub_ph(a, b, c);
- let e = _mm512_set_ph(
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c);
- let e = _mm512_set_ph(
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011);
- let e = _mm512_set_ph(
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
- );
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_max_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_max_ph(a, b);
+ let e = _mm512_set1_ph(2.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c);
+ unsafe fn test_mm512_mask_max_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_max_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
+ 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r =
- _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ unsafe fn test_mm512_maskz_max_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_max_ph(0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a,
- 0b00110011001100110011001100110011,
- b,
- c,
- );
- let e = _mm512_set_ph(
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- );
+ unsafe fn test_mm512_max_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(2.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_max_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
a,
b,
- c,
- 0b00110011001100110011001100110011,
);
let e = _mm512_set_ph(
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
+ 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b00110011001100110011001100110011,
+ unsafe fn test_mm512_maskz_max_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
- c,
);
let e = _mm512_set_ph(
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_fmsubadd_ph(a, b, c);
- let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_max_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_max_sh(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c);
- let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_max_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_max_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_max_sh(src, 1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011);
- let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_max_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_maskz_max_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_max_sh(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_max_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_max_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_max_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r =
+ _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c);
- let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0);
+ unsafe fn test_mm_min_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_min_ph(a, b);
+ let e = _mm_set1_ph(1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmsubadd_ph(a, b, c);
- let e = _mm256_set_ph(
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- );
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_mask_min_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let src = _mm_set1_ph(3.0);
+ let r = _mm_mask_min_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c);
- let e = _mm256_set_ph(
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
- );
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_maskz_min_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_maskz_min_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011);
- let e = _mm256_set_ph(
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
- );
+ unsafe fn test_mm256_min_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_min_ph(a, b);
+ let e = _mm256_set1_ph(1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c);
+ unsafe fn test_mm256_mask_min_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let src = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_min_ph(src, 0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmsubadd_ph(a, b, c);
- let e = _mm512_set_ph(
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_min_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_maskz_min_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
- assert_eq_m512h(r, e);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c);
- let e = _mm512_set_ph(
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
- );
+ unsafe fn test_mm512_min_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_min_ph(a, b);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011);
+ unsafe fn test_mm512_mask_min_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_min_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c);
+ unsafe fn test_mm512_maskz_min_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_min_ph(0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r =
- _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set_ph(
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- );
+ unsafe fn test_mm512_min_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_min_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
a,
- 0b00110011001100110011001100110011,
b,
- c,
);
let e = _mm512_set_ph(
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_min_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
- c,
- 0b00110011001100110011001100110011,
);
let e = _mm512_set_ph(
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b00110011001100110011001100110011,
- a,
- b,
- c,
+ unsafe fn test_mm_min_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_min_sh(a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_min_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_min_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_min_sh(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_min_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_maskz_min_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_min_sh(1, a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_min_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_min_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
);
- let e = _mm512_set_ph(
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_min_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r =
+ _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_rcp_ph() {
- let a = _mm_set1_ph(2.0);
- let r = _mm_rcp_ph(a);
- let e = _mm_set1_ph(0.5);
+ unsafe fn test_mm_getexp_ph() {
+ let a = _mm_set1_ph(3.0);
+ let r = _mm_getexp_ph(a);
+ let e = _mm_set1_ph(1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_rcp_ph() {
- let a = _mm_set1_ph(2.0);
- let src = _mm_set1_ph(1.0);
- let r = _mm_mask_rcp_ph(src, 0b01010101, a);
- let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
+ unsafe fn test_mm_mask_getexp_ph() {
+ let a = _mm_set1_ph(3.0);
+ let src = _mm_set1_ph(4.0);
+ let r = _mm_mask_getexp_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_rcp_ph() {
- let a = _mm_set1_ph(2.0);
- let r = _mm_maskz_rcp_ph(0b01010101, a);
- let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ unsafe fn test_mm_maskz_getexp_ph() {
+ let a = _mm_set1_ph(3.0);
+ let r = _mm_maskz_getexp_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_rcp_ph() {
- let a = _mm256_set1_ph(2.0);
- let r = _mm256_rcp_ph(a);
- let e = _mm256_set1_ph(0.5);
+ unsafe fn test_mm256_getexp_ph() {
+ let a = _mm256_set1_ph(3.0);
+ let r = _mm256_getexp_ph(a);
+ let e = _mm256_set1_ph(1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_rcp_ph() {
- let a = _mm256_set1_ph(2.0);
- let src = _mm256_set1_ph(1.0);
- let r = _mm256_mask_rcp_ph(src, 0b0101010101010101, a);
+ unsafe fn test_mm256_mask_getexp_ph() {
+ let a = _mm256_set1_ph(3.0);
+ let src = _mm256_set1_ph(4.0);
+ let r = _mm256_mask_getexp_ph(src, 0b0101010101010101, a);
let e = _mm256_set_ph(
- 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_rcp_ph() {
- let a = _mm256_set1_ph(2.0);
- let r = _mm256_maskz_rcp_ph(0b0101010101010101, a);
+ unsafe fn test_mm256_maskz_getexp_ph() {
+ let a = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_getexp_ph(0b0101010101010101, a);
let e = _mm256_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_rcp_ph() {
- let a = _mm512_set1_ph(2.0);
- let r = _mm512_rcp_ph(a);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_getexp_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let r = _mm512_getexp_ph(a);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_rcp_ph() {
- let a = _mm512_set1_ph(2.0);
- let src = _mm512_set1_ph(1.0);
- let r = _mm512_mask_rcp_ph(src, 0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_mask_getexp_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let src = _mm512_set1_ph(4.0);
+ let r = _mm512_mask_getexp_ph(src, 0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
- 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
+ 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_getexp_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_getexp_ph(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_getexp_round_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let r = _mm512_getexp_round_ph::<_MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_getexp_round_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let src = _mm512_set1_ph(4.0);
+ let r = _mm512_mask_getexp_round_ph::<_MM_FROUND_NO_EXC>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ );
+ let e = _mm512_set_ph(
+ 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
+ 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_getexp_round_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_getexp_round_ph::<_MM_FROUND_NO_EXC>(
+ 0b01010101010101010101010101010101,
+ a,
+ );
+ let e = _mm512_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_rcp_ph() {
- let a = _mm512_set1_ph(2.0);
- let r = _mm512_maskz_rcp_ph(0b01010101010101010101010101010101, a);
- let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
- );
- assert_eq_m512h(r, e);
+ unsafe fn test_mm_getexp_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_getexp_sh(a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_getexp_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_getexp_sh(src, 0, a, b);
+ let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_getexp_sh(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_getexp_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_getexp_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_getexp_sh(1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_rcp_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_rcp_sh(a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_getexp_round_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_getexp_round_sh::<_MM_FROUND_NO_EXC>(a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_rcp_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_rcp_sh(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_getexp_round_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 0, a, b);
+ let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_rcp_sh(src, 1, a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_rcp_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_maskz_rcp_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_getexp_round_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_rcp_sh(1, a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_rsqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let r = _mm_rsqrt_ph(a);
- let e = _mm_set1_ph(0.5);
+ unsafe fn test_mm_getmant_ph() {
+ let a = _mm_set1_ph(10.0);
+ let r = _mm_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
+ let e = _mm_set1_ph(1.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_rsqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let src = _mm_set1_ph(1.0);
- let r = _mm_mask_rsqrt_ph(src, 0b01010101, a);
- let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
+ unsafe fn test_mm_mask_getmant_ph() {
+ let a = _mm_set1_ph(10.0);
+ let src = _mm_set1_ph(20.0);
+ let r = _mm_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0b01010101, a);
+ let e = _mm_set_ph(20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_rsqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let r = _mm_maskz_rsqrt_ph(0b01010101, a);
- let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ unsafe fn test_mm_maskz_getmant_ph() {
+ let a = _mm_set1_ph(10.0);
+ let r = _mm_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0b01010101, a);
+ let e = _mm_set_ph(0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_rsqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let r = _mm256_rsqrt_ph(a);
- let e = _mm256_set1_ph(0.5);
+ unsafe fn test_mm256_getmant_ph() {
+ let a = _mm256_set1_ph(10.0);
+ let r = _mm256_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
+ let e = _mm256_set1_ph(1.25);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_rsqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let src = _mm256_set1_ph(1.0);
- let r = _mm256_mask_rsqrt_ph(src, 0b0101010101010101, a);
+ unsafe fn test_mm256_mask_getmant_ph() {
+ let a = _mm256_set1_ph(10.0);
+ let src = _mm256_set1_ph(20.0);
+ let r = _mm256_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
+ src,
+ 0b0101010101010101,
+ a,
+ );
let e = _mm256_set_ph(
- 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
+ 20.0, 1.25,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_rsqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let r = _mm256_maskz_rsqrt_ph(0b0101010101010101, a);
+ unsafe fn test_mm256_maskz_getmant_ph() {
+ let a = _mm256_set1_ph(10.0);
+ let r = _mm256_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
+ 0b0101010101010101,
+ a,
+ );
let e = _mm256_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_rsqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_rsqrt_ph(a);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_getmant_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let r = _mm512_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
+ let e = _mm512_set1_ph(1.25);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_rsqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let src = _mm512_set1_ph(1.0);
- let r = _mm512_mask_rsqrt_ph(src, 0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_mask_getmant_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let src = _mm512_set1_ph(20.0);
+ let r = _mm512_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ );
let e = _mm512_set_ph(
- 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
- 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
+ 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
+ 20.0, 1.25, 20.0, 1.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_rsqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_maskz_rsqrt_ph(0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_maskz_getmant_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let r = _mm512_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
+ 0b01010101010101010101010101010101,
+ a,
+ );
let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
+ 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_rsqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r = _mm_rsqrt_sh(a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm512_getmant_round_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let r =
+ _mm512_getmant_round_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
+ a,
+ );
+ let e = _mm512_set1_ph(1.25);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_getmant_round_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let src = _mm512_set1_ph(20.0);
+ let r = _mm512_mask_getmant_round_ph::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(src, 0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
+ 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
+ 20.0, 1.25, 20.0, 1.25,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_getmant_round_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let r = _mm512_maskz_getmant_round_ph::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
+ 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_getmant_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a, b);
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_rsqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_rsqrt_sh(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_getmant_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0, a, b);
+ let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_rsqrt_sh(src, 1, a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 1, a, b);
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_rsqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r = _mm_maskz_rsqrt_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_getmant_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_rsqrt_sh(1, a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(1, a, b);
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_getmant_round_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_getmant_round_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
+ a, b,
+ );
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_getmant_round_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_getmant_round_sh::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(src, 0, a, b);
+ let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_getmant_round_sh::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(src, 1, a, b);
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_getmant_round_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_getmant_round_sh::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_getmant_round_sh::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(1, a, b);
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_sqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let r = _mm_sqrt_ph(a);
- let e = _mm_set1_ph(2.0);
+ unsafe fn test_mm_roundscale_ph() {
+ let a = _mm_set1_ph(1.1);
+ let r = _mm_roundscale_ph::<0>(a);
+ let e = _mm_set1_ph(1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_sqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let src = _mm_set1_ph(1.0);
- let r = _mm_mask_sqrt_ph(src, 0b01010101, a);
- let e = _mm_set_ph(1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0);
+ unsafe fn test_mm_mask_roundscale_ph() {
+ let a = _mm_set1_ph(1.1);
+ let src = _mm_set1_ph(2.0);
+ let r = _mm_mask_roundscale_ph::<0>(src, 0b01010101, a);
+ let e = _mm_set_ph(2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_sqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let r = _mm_maskz_sqrt_ph(0b01010101, a);
- let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
+ unsafe fn test_mm_maskz_roundscale_ph() {
+ let a = _mm_set1_ph(1.1);
+ let r = _mm_maskz_roundscale_ph::<0>(0b01010101, a);
+ let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_sqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let r = _mm256_sqrt_ph(a);
- let e = _mm256_set1_ph(2.0);
+ unsafe fn test_mm256_roundscale_ph() {
+ let a = _mm256_set1_ph(1.1);
+ let r = _mm256_roundscale_ph::<0>(a);
+ let e = _mm256_set1_ph(1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_sqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let src = _mm256_set1_ph(1.0);
- let r = _mm256_mask_sqrt_ph(src, 0b0101010101010101, a);
+ unsafe fn test_mm256_mask_roundscale_ph() {
+ let a = _mm256_set1_ph(1.1);
+ let src = _mm256_set1_ph(2.0);
+ let r = _mm256_mask_roundscale_ph::<0>(src, 0b0101010101010101, a);
let e = _mm256_set_ph(
- 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_sqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let r = _mm256_maskz_sqrt_ph(0b0101010101010101, a);
+ unsafe fn test_mm256_maskz_roundscale_ph() {
+ let a = _mm256_set1_ph(1.1);
+ let r = _mm256_maskz_roundscale_ph::<0>(0b0101010101010101, a);
let e = _mm256_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_sqrt_ph(a);
- let e = _mm512_set1_ph(2.0);
+ unsafe fn test_mm512_roundscale_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let r = _mm512_roundscale_ph::<0>(a);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let src = _mm512_set1_ph(1.0);
- let r = _mm512_mask_sqrt_ph(src, 0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_mask_roundscale_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let src = _mm512_set1_ph(2.0);
+ let r = _mm512_mask_roundscale_ph::<0>(src, 0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
- 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_maskz_sqrt_ph(0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_maskz_roundscale_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let r = _mm512_maskz_roundscale_ph::<0>(0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
- 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sqrt_round_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
- let e = _mm512_set1_ph(2.0);
+ unsafe fn test_mm512_roundscale_round_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let r = _mm512_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sqrt_round_ph() {
- let a = _mm512_set1_ph(4.0);
- let src = _mm512_set1_ph(1.0);
- let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_roundscale_round_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let src = _mm512_set1_ph(2.0);
+ let r = _mm512_mask_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
src,
0b01010101010101010101010101010101,
a,
);
let e = _mm512_set_ph(
- 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
- 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sqrt_round_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_roundscale_round_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let r = _mm512_maskz_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
0b01010101010101010101010101010101,
a,
);
let e = _mm512_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
- 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r = _mm_sqrt_sh(a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_roundscale_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_roundscale_sh::<0>(a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_sqrt_sh(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_roundscale_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_roundscale_sh::<0>(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_sqrt_sh(src, 1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_roundscale_sh::<0>(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r = _mm_maskz_sqrt_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_sqrt_sh(1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_roundscale_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_roundscale_sh::<0>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sqrt_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_roundscale_sh::<0>(1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sqrt_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_roundscale_round_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_roundscale_round_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sqrt_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r =
- _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_roundscale_round_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r =
- _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_max_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let r = _mm_max_ph(a, b);
- let e = _mm_set1_ph(2.0);
+ unsafe fn test_mm_scalef_ph() {
+ let a = _mm_set1_ph(1.);
+ let b = _mm_set1_ph(3.);
+ let r = _mm_scalef_ph(a, b);
+ let e = _mm_set1_ph(8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_max_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let src = _mm_set1_ph(3.0);
- let r = _mm_mask_max_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0);
+ unsafe fn test_mm_mask_scalef_ph() {
+ let a = _mm_set1_ph(1.);
+ let b = _mm_set1_ph(3.);
+ let src = _mm_set1_ph(2.);
+ let r = _mm_mask_scalef_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_max_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let r = _mm_maskz_max_ph(0b01010101, a, b);
- let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
+ unsafe fn test_mm_maskz_scalef_ph() {
+ let a = _mm_set1_ph(1.);
+ let b = _mm_set1_ph(3.);
+ let r = _mm_maskz_scalef_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_max_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let r = _mm256_max_ph(a, b);
- let e = _mm256_set1_ph(2.0);
+ unsafe fn test_mm256_scalef_ph() {
+ let a = _mm256_set1_ph(1.);
+ let b = _mm256_set1_ph(3.);
+ let r = _mm256_scalef_ph(a, b);
+ let e = _mm256_set1_ph(8.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_max_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let src = _mm256_set1_ph(3.0);
- let r = _mm256_mask_max_ph(src, 0b0101010101010101, a, b);
+ unsafe fn test_mm256_mask_scalef_ph() {
+ let a = _mm256_set1_ph(1.);
+ let b = _mm256_set1_ph(3.);
+ let src = _mm256_set1_ph(2.);
+ let r = _mm256_mask_scalef_ph(src, 0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
+ 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_max_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let r = _mm256_maskz_max_ph(0b0101010101010101, a, b);
+ unsafe fn test_mm256_maskz_scalef_ph() {
+ let a = _mm256_set1_ph(1.);
+ let b = _mm256_set1_ph(3.);
+ let r = _mm256_maskz_scalef_ph(0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_max_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_max_ph(a, b);
- let e = _mm512_set1_ph(2.0);
+ unsafe fn test_mm512_scalef_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let r = _mm512_scalef_ph(a, b);
+ let e = _mm512_set1_ph(8.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_max_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let src = _mm512_set1_ph(3.0);
- let r = _mm512_mask_max_ph(src, 0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_mask_scalef_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let src = _mm512_set1_ph(2.);
+ let r = _mm512_mask_scalef_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
- 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
+ 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
+ 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_max_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_maskz_max_ph(0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_maskz_scalef_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let r = _mm512_maskz_scalef_ph(0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
- 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
+ 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_max_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(2.0);
+ unsafe fn test_mm512_scalef_round_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let r = _mm512_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(8.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_max_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let src = _mm512_set1_ph(3.0);
- let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_scalef_round_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let src = _mm512_set1_ph(2.);
+ let r = _mm512_mask_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
0b01010101010101010101010101010101,
a,
b,
);
let e = _mm512_set_ph(
- 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
- 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
+ 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
+ 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_max_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_scalef_round_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let r = _mm512_maskz_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
b,
);
let e = _mm512_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
- 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
+ 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_max_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_max_sh(a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_scalef_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_scalef_sh(a, b);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_max_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_max_sh(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_scalef_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_scalef_sh(src, 0, a, b);
+ let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_max_sh(src, 1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_scalef_sh(src, 1, a, b);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_max_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_maskz_max_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_scalef_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_scalef_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_max_sh(1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_scalef_sh(1, a, b);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_max_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_scalef_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_max_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_scalef_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 1, a, b,
);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_max_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ unsafe fn test_mm_maskz_scalef_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
let r =
- _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
let r =
- _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_min_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let r = _mm_min_ph(a, b);
- let e = _mm_set1_ph(1.0);
+ unsafe fn test_mm_reduce_ph() {
+ let a = _mm_set1_ph(1.25);
+ let r = _mm_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
+ let e = _mm_set1_ph(0.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_min_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let src = _mm_set1_ph(3.0);
- let r = _mm_mask_min_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
+ unsafe fn test_mm_mask_reduce_ph() {
+ let a = _mm_set1_ph(1.25);
+ let src = _mm_set1_ph(2.0);
+ let r = _mm_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01010101, a);
+ let e = _mm_set_ph(2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_min_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let r = _mm_maskz_min_ph(0b01010101, a, b);
- let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+ unsafe fn test_mm_maskz_reduce_ph() {
+ let a = _mm_set1_ph(1.25);
+ let r = _mm_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01010101, a);
+ let e = _mm_set_ph(0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_min_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let r = _mm256_min_ph(a, b);
- let e = _mm256_set1_ph(1.0);
+ unsafe fn test_mm256_reduce_ph() {
+ let a = _mm256_set1_ph(1.25);
+ let r = _mm256_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
+ let e = _mm256_set1_ph(0.25);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_min_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let src = _mm256_set1_ph(3.0);
- let r = _mm256_mask_min_ph(src, 0b0101010101010101, a, b);
+ unsafe fn test_mm256_mask_reduce_ph() {
+ let a = _mm256_set1_ph(1.25);
+ let src = _mm256_set1_ph(2.0);
+ let r = _mm256_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0101010101010101, a);
let e = _mm256_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_min_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let r = _mm256_maskz_min_ph(0b0101010101010101, a, b);
+ unsafe fn test_mm256_maskz_reduce_ph() {
+ let a = _mm256_set1_ph(1.25);
+ let r = _mm256_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0101010101010101, a);
let e = _mm256_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_min_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_min_ph(a, b);
- let e = _mm512_set1_ph(1.0);
+ unsafe fn test_mm512_reduce_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let r = _mm512_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
+ let e = _mm512_set1_ph(0.25);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_min_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let src = _mm512_set1_ph(3.0);
- let r = _mm512_mask_min_ph(src, 0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_mask_reduce_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let src = _mm512_set1_ph(2.0);
+ let r = _mm512_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ );
let e = _mm512_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
- 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
+ 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_min_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_maskz_min_ph(0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_maskz_reduce_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let r = _mm512_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
+ 0b01010101010101010101010101010101,
+ a,
+ );
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
+ 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_min_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(1.0);
+ unsafe fn test_mm512_reduce_round_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let r = _mm512_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set1_ph(0.25);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_min_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let src = _mm512_set1_ph(3.0);
- let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_reduce_round_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let src = _mm512_set1_ph(2.0);
+ let r = _mm512_mask_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
src,
0b01010101010101010101010101010101,
a,
- b,
);
let e = _mm512_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
- 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
+ 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_min_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_reduce_round_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let r = _mm512_maskz_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
0b01010101010101010101010101010101,
a,
- b,
);
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
+ 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_min_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_min_sh(a, b);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_reduce_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_min_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_min_sh(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_reduce_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0, a, b);
+ let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_min_sh(src, 1, a, b);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 1, a, b);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_min_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_maskz_min_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_reduce_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_min_sh(1, a, b);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(1, a, b);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_min_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_reduce_round_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_min_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_reduce_round_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
src, 0, a, b,
);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
src, 1, a, b,
);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_min_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ unsafe fn test_mm_maskz_reduce_round_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
let r =
- _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
let r =
- _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(1, a, b);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
}
From d304918924a06925a242adffbcdef155fc6866e9 Mon Sep 17 00:00:00 2001
From: sayantn
Date: Mon, 15 Jul 2024 22:43:22 +0530
Subject: [PATCH 07/11] AVX512FP16 Part 6: Remaining
`cmpph`, `fpclass`, reduce, `blend`, `permutex`
---
crates/core_arch/missing-x86.md | 35 -
crates/core_arch/src/x86/avx512fp16.rs | 1126 ++++++++++++++++++++++++
2 files changed, 1126 insertions(+), 35 deletions(-)
diff --git a/crates/core_arch/missing-x86.md b/crates/core_arch/missing-x86.md
index 72fc8b840e..8fee3cd36f 100644
--- a/crates/core_arch/missing-x86.md
+++ b/crates/core_arch/missing-x86.md
@@ -55,7 +55,6 @@
* [ ] [`_mm256_cvtsh_h`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsh_h)
* [ ] [`_mm256_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_pch)
- * [ ] [`_mm512_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_ph_mask)
* [ ] [`_mm512_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask)
* [ ] [`_mm512_cvt_roundepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi16_ph)
* [ ] [`_mm512_cvt_roundepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ph)
@@ -102,9 +101,6 @@
* [ ] [`_mm512_cvtx_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundps_ph)
* [ ] [`_mm512_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps)
* [ ] [`_mm512_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph)
- * [ ] [`_mm512_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fpclass_ph_mask)
- * [ ] [`_mm512_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph)
- * [ ] [`_mm512_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask)
* [ ] [`_mm512_mask_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask)
* [ ] [`_mm512_mask_cvt_roundepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi16_ph)
* [ ] [`_mm512_mask_cvt_roundepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ph)
@@ -150,7 +146,6 @@
* [ ] [`_mm512_mask_cvtx_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundps_ph)
* [ ] [`_mm512_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps)
* [ ] [`_mm512_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph)
- * [ ] [`_mm512_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fpclass_ph_mask)
* [ ] [`_mm512_maskz_cvt_roundepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph)
* [ ] [`_mm512_maskz_cvt_roundepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph)
* [ ] [`_mm512_maskz_cvt_roundepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph)
@@ -195,12 +190,6 @@
* [ ] [`_mm512_maskz_cvtx_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph)
* [ ] [`_mm512_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps)
* [ ] [`_mm512_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph)
- * [ ] [`_mm512_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph)
- * [ ] [`_mm512_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph)
- * [ ] [`_mm512_reduce_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ph)
- * [ ] [`_mm512_reduce_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ph)
- * [ ] [`_mm512_reduce_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ph)
- * [ ] [`_mm512_reduce_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_ph)
* [ ] [`_mm512_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_pch)
* [ ] [`_mm_cvt_roundi32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh)
* [ ] [`_mm_cvt_roundi64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sh)
@@ -237,7 +226,6 @@
* [ ] [`_mm_cvttsh_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u64)
* [ ] [`_mm_cvtu32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh)
* [ ] [`_mm_cvtu64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sh)
- * [ ] [`_mm_fpclass_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_sh_mask)
* [ ] [`_mm_mask_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh)
* [ ] [`_mm_mask_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd)
* [ ] [`_mm_mask_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss)
@@ -246,7 +234,6 @@
* [ ] [`_mm_mask_cvtsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd)
* [ ] [`_mm_mask_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss)
* [ ] [`_mm_mask_cvtss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh)
- * [ ] [`_mm_mask_fpclass_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_sh_mask)
* [ ] [`_mm_maskz_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh)
* [ ] [`_mm_maskz_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd)
* [ ] [`_mm_maskz_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss)
@@ -261,7 +248,6 @@
["AVX512_FP16", "AVX512VL"]
- * [ ] [`_mm256_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ph_mask)
* [ ] [`_mm256_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_ph)
* [ ] [`_mm256_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ph)
* [ ] [`_mm256_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi64_ph)
@@ -284,9 +270,6 @@
* [ ] [`_mm256_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu64)
* [ ] [`_mm256_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps)
* [ ] [`_mm256_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph)
- * [ ] [`_mm256_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask)
- * [ ] [`_mm256_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph)
- * [ ] [`_mm256_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask)
* [ ] [`_mm256_mask_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph)
* [ ] [`_mm256_mask_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi32_ph)
* [ ] [`_mm256_mask_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi64_ph)
@@ -309,7 +292,6 @@
* [ ] [`_mm256_mask_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu64)
* [ ] [`_mm256_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps)
* [ ] [`_mm256_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph)
- * [ ] [`_mm256_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask)
* [ ] [`_mm256_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph)
* [ ] [`_mm256_maskz_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph)
* [ ] [`_mm256_maskz_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph)
@@ -332,13 +314,6 @@
* [ ] [`_mm256_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64)
* [ ] [`_mm256_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps)
* [ ] [`_mm256_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph)
- * [ ] [`_mm256_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph)
- * [ ] [`_mm256_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph)
- * [ ] [`_mm256_reduce_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_add_ph)
- * [ ] [`_mm256_reduce_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_max_ph)
- * [ ] [`_mm256_reduce_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_min_ph)
- * [ ] [`_mm256_reduce_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_mul_ph)
- * [ ] [`_mm_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask)
* [ ] [`_mm_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph)
* [ ] [`_mm_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph)
* [ ] [`_mm_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi64_ph)
@@ -361,9 +336,6 @@
* [ ] [`_mm_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu64)
* [ ] [`_mm_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps)
* [ ] [`_mm_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph)
- * [ ] [`_mm_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask)
- * [ ] [`_mm_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph)
- * [ ] [`_mm_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask)
* [ ] [`_mm_mask_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph)
* [ ] [`_mm_mask_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi32_ph)
* [ ] [`_mm_mask_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi64_ph)
@@ -386,7 +358,6 @@
* [ ] [`_mm_mask_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu64)
* [ ] [`_mm_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps)
* [ ] [`_mm_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph)
- * [ ] [`_mm_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask)
* [ ] [`_mm_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph)
* [ ] [`_mm_maskz_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph)
* [ ] [`_mm_maskz_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph)
@@ -409,12 +380,6 @@
* [ ] [`_mm_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64)
* [ ] [`_mm_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps)
* [ ] [`_mm_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph)
- * [ ] [`_mm_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph)
- * [ ] [`_mm_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph)
- * [ ] [`_mm_reduce_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_add_ph)
- * [ ] [`_mm_reduce_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_max_ph)
- * [ ] [`_mm_reduce_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_min_ph)
- * [ ] [`_mm_reduce_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_mul_ph)
diff --git a/crates/core_arch/src/x86/avx512fp16.rs b/crates/core_arch/src/x86/avx512fp16.rs
index 3c04d9ae90..3889ce1f5e 100644
--- a/crates/core_arch/src/x86/avx512fp16.rs
+++ b/crates/core_arch/src/x86/avx512fp16.rs
@@ -615,6 +615,127 @@ pub unsafe fn _mm512_zextph128_ph512(a: __m128h) -> __m512h {
)
}
+macro_rules! cmp_asm {
+ ($mask_type: ty, $reg: ident, $a: expr, $b: expr) => {{
+ let dst: $mask_type;
+ crate::arch::asm!(
+ "vcmpph {k}, {a}, {b}, {imm8}",
+ k = lateout(kreg) dst,
+ a = in($reg) $a,
+ b = in($reg) $b,
+ imm8 = const IMM5,
+ options(pure, nomem, nostack)
+ );
+ dst
+ }};
+ ($mask_type: ty, $mask: expr, $reg: ident, $a: expr, $b: expr) => {{
+ let dst: $mask_type;
+ crate::arch::asm!(
+ "vcmpph {k} {{ {mask} }}, {a}, {b}, {imm8}",
+ k = lateout(kreg) dst,
+ mask = in(kreg) $mask,
+ a = in($reg) $a,
+ b = in($reg) $b,
+ imm8 = const IMM5,
+ options(pure, nomem, nostack)
+ );
+ dst
+ }};
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
+/// operand specified by imm8, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl,avx512f,sse")]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cmp_ph_mask(a: __m128h, b: __m128h) -> __mmask8 {
+ static_assert_uimm_bits!(IMM5, 5);
+ cmp_asm!(__mmask8, xmm_reg, a, b)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
+/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
+/// zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl,avx512f,sse")]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cmp_ph_mask(
+ k1: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __mmask8 {
+ static_assert_uimm_bits!(IMM5, 5);
+ cmp_asm!(__mmask8, k1, xmm_reg, a, b)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
+/// operand specified by imm8, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ph_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl,avx512f,avx")]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cmp_ph_mask(a: __m256h, b: __m256h) -> __mmask16 {
+ static_assert_uimm_bits!(IMM5, 5);
+ cmp_asm!(__mmask16, ymm_reg, a, b)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
+/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
+/// zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl,avx512f,avx")]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cmp_ph_mask(
+ k1: __mmask16,
+ a: __m256h,
+ b: __m256h,
+) -> __mmask16 {
+ static_assert_uimm_bits!(IMM5, 5);
+ cmp_asm!(__mmask16, k1, ymm_reg, a, b)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
+/// operand specified by imm8, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_ph_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512bw,avx512f")]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cmp_ph_mask(a: __m512h, b: __m512h) -> __mmask32 {
+ static_assert_uimm_bits!(IMM5, 5);
+ cmp_asm!(__mmask32, zmm_reg, a, b)
+}
+
+/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
+/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
+/// zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512bw,avx512f")]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cmp_ph_mask(
+ k1: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __mmask32 {
+ static_assert_uimm_bits!(IMM5, 5);
+ cmp_asm!(__mmask32, k1, zmm_reg, a, b)
+}
+
/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
/// operand specified by imm8, and store the result in mask vector k. Exceptions can be suppressed by
/// passing _MM_FROUND_NO_EXC in the sae parameter.
@@ -10639,6 +10760,520 @@ pub unsafe fn _mm_maskz_reduce_round_sh(
_mm_mask_reduce_round_sh::(_mm_setzero_ph(), k, a, b)
}
+/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
+/// sum of all elements in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_add_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_reduce_add_ph(a: __m128h) -> f16 {
+ let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
+ let a = _mm_add_ph(a, b);
+ let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
+ let a = _mm_add_ph(a, b);
+ simd_extract::<_, f16>(a, 0) + simd_extract::<_, f16>(a, 1)
+}
+
+/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
+/// sum of all elements in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_add_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_reduce_add_ph(a: __m256h) -> f16 {
+ let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+ let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+ _mm_reduce_add_ph(_mm_add_ph(p, q))
+}
+
+/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
+/// sum of all elements in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_reduce_add_ph(a: __m512h) -> f16 {
+ let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+ let q = simd_shuffle!(
+ a,
+ a,
+ [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
+ );
+ _mm256_reduce_add_ph(_mm256_add_ph(p, q))
+}
+
+/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
+/// the product of all elements in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_mul_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_reduce_mul_ph(a: __m128h) -> f16 {
+ let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
+ let a = _mm_mul_ph(a, b);
+ let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
+ let a = _mm_mul_ph(a, b);
+ simd_extract::<_, f16>(a, 0) * simd_extract::<_, f16>(a, 1)
+}
+
+/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
+/// the product of all elements in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_mul_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_reduce_mul_ph(a: __m256h) -> f16 {
+ let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+ let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+ _mm_reduce_mul_ph(_mm_mul_ph(p, q))
+}
+
+/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
+/// the product of all elements in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_reduce_mul_ph(a: __m512h) -> f16 {
+ let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+ let q = simd_shuffle!(
+ a,
+ a,
+ [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
+ );
+ _mm256_reduce_mul_ph(_mm256_mul_ph(p, q))
+}
+
+/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
+/// minimum of all elements in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_min_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_reduce_min_ph(a: __m128h) -> f16 {
+ let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
+ let a = _mm_min_ph(a, b);
+ let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
+ let a = _mm_min_ph(a, b);
+ let b = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]);
+ simd_extract!(_mm_min_sh(a, b), 0)
+}
+
+/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
+/// minimum of all elements in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_min_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_reduce_min_ph(a: __m256h) -> f16 {
+ let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+ let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+ _mm_reduce_min_ph(_mm_min_ph(p, q))
+}
+
+/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
+/// minimum of all elements in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_reduce_min_ph(a: __m512h) -> f16 {
+ let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+ let q = simd_shuffle!(
+ a,
+ a,
+ [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
+ );
+ _mm256_reduce_min_ph(_mm256_min_ph(p, q))
+}
+
+/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
+/// maximum of all elements in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_max_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_reduce_max_ph(a: __m128h) -> f16 {
+ let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
+ let a = _mm_max_ph(a, b);
+ let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
+ let a = _mm_max_ph(a, b);
+ let b = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]);
+ simd_extract!(_mm_max_sh(a, b), 0)
+}
+
+/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
+/// maximum of all elements in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_max_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_reduce_max_ph(a: __m256h) -> f16 {
+ let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+ let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+ _mm_reduce_max_ph(_mm_max_ph(p, q))
+}
+
+/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
+/// maximum of all elements in a.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_reduce_max_ph(a: __m512h) -> f16 {
+ let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+ let q = simd_shuffle!(
+ a,
+ a,
+ [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
+ );
+ _mm256_reduce_max_ph(_mm256_max_ph(p, q))
+}
+
+macro_rules! fpclass_asm {
+ ($mask_type: ty, $reg: ident, $a: expr) => {{
+ let dst: $mask_type;
+ crate::arch::asm!(
+ "vfpclassph {k}, {src}, {imm8}",
+ k = lateout(kreg) dst,
+ src = in($reg) $a,
+ imm8 = const IMM8,
+ options(pure, nomem, nostack)
+ );
+ dst
+ }};
+ ($mask_type: ty, $mask: expr, $reg: ident, $a: expr) => {{
+ let dst: $mask_type;
+ crate::arch::asm!(
+ "vfpclassph {k} {{ {mask} }}, {src}, {imm8}",
+ k = lateout(kreg) dst,
+ mask = in(kreg) $mask,
+ src = in($reg) $a,
+ imm8 = const IMM8,
+ options(pure, nomem, nostack)
+ );
+ dst
+ }};
+}
+
+/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
+/// by imm8, and store the results in mask vector k.
+/// imm can be a combination of:
+///
+/// 0x01 // QNaN
+/// 0x02 // Positive Zero
+/// 0x04 // Negative Zero
+/// 0x08 // Positive Infinity
+/// 0x10 // Negative Infinity
+/// 0x20 // Denormal
+/// 0x40 // Negative
+/// 0x80 // SNaN
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl,avx512f,sse")]
+#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fpclass_ph_mask(a: __m128h) -> __mmask8 {
+ static_assert_uimm_bits!(IMM8, 8);
+ fpclass_asm!(__mmask8, xmm_reg, a)
+}
+
+/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
+/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+/// imm can be a combination of:
+///
+/// 0x01 // QNaN
+/// 0x02 // Positive Zero
+/// 0x04 // Negative Zero
+/// 0x08 // Positive Infinity
+/// 0x10 // Negative Infinity
+/// 0x20 // Denormal
+/// 0x40 // Negative
+/// 0x80 // SNaN
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl,avx512f,sse")]
+#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fpclass_ph_mask(k1: __mmask8, a: __m128h) -> __mmask8 {
+ static_assert_uimm_bits!(IMM8, 8);
+ fpclass_asm!(__mmask8, k1, xmm_reg, a)
+}
+
+/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
+/// by imm8, and store the results in mask vector k.
+/// imm can be a combination of:
+///
+/// 0x01 // QNaN
+/// 0x02 // Positive Zero
+/// 0x04 // Negative Zero
+/// 0x08 // Positive Infinity
+/// 0x10 // Negative Infinity
+/// 0x20 // Denormal
+/// 0x40 // Negative
+/// 0x80 // SNaN
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl,avx512f,avx")]
+#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_fpclass_ph_mask(a: __m256h) -> __mmask16 {
+ static_assert_uimm_bits!(IMM8, 8);
+ fpclass_asm!(__mmask16, ymm_reg, a)
+}
+
+/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
+/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+/// imm can be a combination of:
+///
+/// 0x01 // QNaN
+/// 0x02 // Positive Zero
+/// 0x04 // Negative Zero
+/// 0x08 // Positive Infinity
+/// 0x10 // Negative Infinity
+/// 0x20 // Denormal
+/// 0x40 // Negative
+/// 0x80 // SNaN
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl,avx512f,avx")]
+#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_fpclass_ph_mask(k1: __mmask16, a: __m256h) -> __mmask16 {
+ static_assert_uimm_bits!(IMM8, 8);
+ fpclass_asm!(__mmask16, k1, ymm_reg, a)
+}
+
+/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
+/// by imm8, and store the results in mask vector k.
+/// imm can be a combination of:
+///
+/// 0x01 // QNaN
+/// 0x02 // Positive Zero
+/// 0x04 // Negative Zero
+/// 0x08 // Positive Infinity
+/// 0x10 // Negative Infinity
+/// 0x20 // Denormal
+/// 0x40 // Negative
+/// 0x80 // SNaN
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fpclass_ph_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512bw,avx512f")]
+#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_fpclass_ph_mask(a: __m512h) -> __mmask32 {
+ static_assert_uimm_bits!(IMM8, 8);
+ fpclass_asm!(__mmask32, zmm_reg, a)
+}
+
+/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
+/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+/// imm can be a combination of:
+///
+/// 0x01 // QNaN
+/// 0x02 // Positive Zero
+/// 0x04 // Negative Zero
+/// 0x08 // Positive Infinity
+/// 0x10 // Negative Infinity
+/// 0x20 // Denormal
+/// 0x40 // Negative
+/// 0x80 // SNaN
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fpclass_ph_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512bw,avx512f")]
+#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_fpclass_ph_mask(k1: __mmask32, a: __m512h) -> __mmask32 {
+ static_assert_uimm_bits!(IMM8, 8);
+ fpclass_asm!(__mmask32, k1, zmm_reg, a)
+}
+
+/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified
+/// by imm8, and store the result in mask vector k.
+/// imm can be a combination of:
+///
+/// 0x01 // QNaN
+/// 0x02 // Positive Zero
+/// 0x04 // Negative Zero
+/// 0x08 // Positive Infinity
+/// 0x10 // Negative Infinity
+/// 0x20 // Denormal
+/// 0x40 // Negative
+/// 0x80 // SNaN
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_sh_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_fpclass_sh_mask(a: __m128h) -> __mmask8 {
+ _mm_mask_fpclass_sh_mask::(0xff, a)
+}
+
+/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified
+/// by imm8, and store the result in mask vector k using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+/// imm can be a combination of:
+///
+/// 0x01 // QNaN
+/// 0x02 // Positive Zero
+/// 0x04 // Negative Zero
+/// 0x08 // Positive Infinity
+/// 0x10 // Negative Infinity
+/// 0x20 // Denormal
+/// 0x40 // Negative
+/// 0x80 // SNaN
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_sh_mask)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_fpclass_sh_mask(k1: __mmask8, a: __m128h) -> __mmask8 {
+ static_assert_uimm_bits!(IMM8, 8);
+ vfpclasssh(a, IMM8, k1)
+}
+
+/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ simd_select_bitmask(k, b, a)
+}
+
+/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ simd_select_bitmask(k, b, a)
+}
+
+/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ simd_select_bitmask(k, b, a)
+}
+
+/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
+/// and index in idx, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_permutex2var_ph(a: __m128h, idx: __m128i, b: __m128h) -> __m128h {
+ _mm_castsi128_ph(_mm_permutex2var_epi16(
+ _mm_castph_si128(a),
+ idx,
+ _mm_castph_si128(b),
+ ))
+}
+
+/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
+/// and index in idx, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_permutex2var_ph(a: __m256h, idx: __m256i, b: __m256h) -> __m256h {
+ _mm256_castsi256_ph(_mm256_permutex2var_epi16(
+ _mm256_castph_si256(a),
+ idx,
+ _mm256_castph_si256(b),
+ ))
+}
+
+/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
+/// and index in idx, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_permutex2var_ph(a: __m512h, idx: __m512i, b: __m512h) -> __m512h {
+ _mm512_castsi512_ph(_mm512_permutex2var_epi16(
+ _mm512_castph_si512(a),
+ idx,
+ _mm512_castph_si512(b),
+ ))
+}
+
+/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_permutexvar_ph(idx: __m128i, a: __m128h) -> __m128h {
+ _mm_castsi128_ph(_mm_permutexvar_epi16(idx, _mm_castph_si128(a)))
+}
+
+/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_permutexvar_ph(idx: __m256i, a: __m256h) -> __m256h {
+ _mm256_castsi256_ph(_mm256_permutexvar_epi16(idx, _mm256_castph_si256(a)))
+}
+
+/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h {
+ _mm512_castsi512_ph(_mm512_permutexvar_epi16(idx, _mm512_castph_si512(a)))
+}
+
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
@@ -10832,6 +11467,9 @@ extern "C" {
#[link_name = "llvm.x86.avx512fp16.mask.reduce.sh"]
fn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32)
-> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.fpclass.sh"]
+ fn vfpclasssh(a: __m128h, imm8: i32, k: __mmask8) -> __mmask8;
}
#[cfg(test)]
@@ -11216,6 +11854,80 @@ mod tests {
assert_eq_m512h(r, e);
}
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cmp_ph_mask() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
+ let r = _mm_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 0b11110000);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cmp_ph_mask() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
+ let r = _mm_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101, a, b);
+ assert_eq!(r, 0b01010000);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cmp_ph_mask() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
+ -16.0,
+ );
+ let r = _mm256_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 0b1111000011110000);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cmp_ph_mask() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
+ -16.0,
+ );
+ let r = _mm256_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b0101010101010101, a, b);
+ assert_eq!(r, 0b0101000001010000);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cmp_ph_mask() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
+ -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
+ -29.0, -30.0, -31.0, -32.0,
+ );
+ let r = _mm512_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 0b11110000111100001111000011110000);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cmp_ph_mask() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
+ -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
+ -29.0, -30.0, -31.0, -32.0,
+ );
+ let r = _mm512_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101010101010101010101010101, a, b);
+ assert_eq!(r, 0b01010000010100000101000001010000);
+ }
+
#[simd_test(enable = "avx512fp16")]
unsafe fn test_mm_cmp_round_sh_mask() {
let a = _mm_set_sh(1.0);
@@ -17754,4 +18466,418 @@ mod tests {
let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_reduce_add_ph() {
+ let a = _mm_set1_ph(2.0);
+ let r = _mm_reduce_add_ph(a);
+ assert_eq!(r, 16.0);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_reduce_add_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let r = _mm256_reduce_add_ph(a);
+ assert_eq!(r, 32.0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_reduce_add_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let r = _mm512_reduce_add_ph(a);
+ assert_eq!(r, 64.0);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_reduce_mul_ph() {
+ let a = _mm_set1_ph(2.0);
+ let r = _mm_reduce_mul_ph(a);
+ assert_eq!(r, 256.0);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_reduce_mul_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let r = _mm256_reduce_mul_ph(a);
+ assert_eq!(r, 65536.0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_reduce_mul_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let r = _mm512_reduce_mul_ph(a);
+ assert_eq!(r, 16777216.0);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_reduce_max_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_reduce_max_ph(a);
+ assert_eq!(r, 8.0);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_reduce_max_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm256_reduce_max_ph(a);
+ assert_eq!(r, 16.0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_reduce_max_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_reduce_max_ph(a);
+ assert_eq!(r, 32.0);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_reduce_min_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_reduce_min_ph(a);
+ assert_eq!(r, 1.0);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_reduce_min_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm256_reduce_min_ph(a);
+ assert_eq!(r, 1.0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_reduce_min_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_reduce_min_ph(a);
+ assert_eq!(r, 1.0);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_fpclass_ph_mask() {
+ let a = _mm_set_ph(
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ );
+ let r = _mm_fpclass_ph_mask::<0x18>(a); // infinities
+ assert_eq!(r, 0b01100000);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_fpclass_ph_mask() {
+ let a = _mm_set_ph(
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ );
+ let r = _mm_mask_fpclass_ph_mask::<0x18>(0b01010101, a);
+ assert_eq!(r, 0b01000000);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fpclass_ph_mask() {
+ let a = _mm256_set_ph(
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ );
+ let r = _mm256_fpclass_ph_mask::<0x18>(a); // infinities
+ assert_eq!(r, 0b0110000001100000);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_fpclass_ph_mask() {
+ let a = _mm256_set_ph(
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ );
+ let r = _mm256_mask_fpclass_ph_mask::<0x18>(0b0101010101010101, a);
+ assert_eq!(r, 0b0100000001000000);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_fpclass_ph_mask() {
+ let a = _mm512_set_ph(
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ );
+ let r = _mm512_fpclass_ph_mask::<0x18>(a); // infinities
+ assert_eq!(r, 0b01100000011000000110000001100000);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_fpclass_ph_mask() {
+ let a = _mm512_set_ph(
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ );
+ let r = _mm512_mask_fpclass_ph_mask::<0x18>(0b01010101010101010101010101010101, a);
+ assert_eq!(r, 0b01000000010000000100000001000000);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_fpclass_sh_mask() {
+ let a = _mm_set_sh(f16::INFINITY);
+ let r = _mm_fpclass_sh_mask::<0x18>(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_fpclass_sh_mask() {
+ let a = _mm_set_sh(f16::INFINITY);
+ let r = _mm_mask_fpclass_sh_mask::<0x18>(0, a);
+ assert_eq!(r, 0);
+ let r = _mm_mask_fpclass_sh_mask::<0x18>(1, a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_blend_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0);
+ let r = _mm_mask_blend_ph(0b01010101, a, b);
+ let e = _mm_set_ph(1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_blend_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0,
+ -14.0, -15.0, -16.0,
+ );
+ let r = _mm256_mask_blend_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0,
+ -16.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_blend_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0,
+ -14.0, -15.0, -16.0, -17.0, -18.0, -19.0, -20.0, -21.0, -22.0, -23.0, -24.0, -25.0,
+ -26.0, -27.0, -28.0, -29.0, -30.0, -31.0, -32.0,
+ );
+ let r = _mm512_mask_blend_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0,
+ -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0, 27.0, -28.0,
+ 29.0, -30.0, 31.0, -32.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_permutex2var_ph() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_setr_ph(9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let idx = _mm_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14);
+ let r = _mm_permutex2var_ph(a, idx, b);
+ let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_permutex2var_ph() {
+ let a = _mm256_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_setr_ph(
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let idx = _mm256_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+ let r = _mm256_permutex2var_ph(a, idx, b);
+ let e = _mm256_setr_ph(
+ 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
+ 31.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_permutex2var_ph() {
+ let a = _mm512_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_setr_ph(
+ 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0,
+ 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0,
+ 61.0, 62.0, 63.0, 64.0,
+ );
+ let idx = _mm512_set_epi16(
+ 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, 24, 22, 20,
+ 18, 16, 14, 12, 10, 8, 6, 4, 2, 0,
+ );
+ let r = _mm512_permutex2var_ph(a, idx, b);
+ let e = _mm512_setr_ph(
+ 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
+ 31.0, 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0, 49.0, 51.0, 53.0, 55.0, 57.0,
+ 59.0, 61.0, 63.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_permutexvar_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let idx = _mm_set_epi16(0, 2, 4, 6, 1, 3, 5, 7);
+ let r = _mm_permutexvar_ph(idx, a);
+ let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 2.0, 4.0, 6.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_permutexvar_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let idx = _mm256_set_epi16(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
+ let r = _mm256_permutexvar_ph(idx, a);
+ let e = _mm256_setr_ph(
+ 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_permutexvar_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let idx = _mm512_set_epi16(
+ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 1, 3, 5, 7, 9, 11, 13, 15,
+ 17, 19, 21, 23, 25, 27, 29, 31,
+ );
+ let r = _mm512_permutexvar_ph(idx, a);
+ let e = _mm512_setr_ph(
+ 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
+ 31.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0,
+ 30.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
+ }
}
From 2ae57f04a5dfab907e9dfc938de00c4e6bdf3619 Mon Sep 17 00:00:00 2001
From: sayantn
Date: Tue, 16 Jul 2024 12:37:21 +0530
Subject: [PATCH 08/11] AVX512FP16 Part 7: Convert to f16
---
crates/core_arch/missing-x86.md | 116 -
crates/core_arch/src/x86/avx512fp16.rs | 12052 ++++++++++++--------
crates/core_arch/src/x86_64/avx512fp16.rs | 129 +
crates/core_arch/src/x86_64/mod.rs | 4 +
4 files changed, 7689 insertions(+), 4612 deletions(-)
create mode 100644 crates/core_arch/src/x86_64/avx512fp16.rs
diff --git a/crates/core_arch/missing-x86.md b/crates/core_arch/missing-x86.md
index 8fee3cd36f..1c2d0a6d7b 100644
--- a/crates/core_arch/missing-x86.md
+++ b/crates/core_arch/missing-x86.md
@@ -56,13 +56,6 @@
* [ ] [`_mm256_cvtsh_h`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsh_h)
* [ ] [`_mm256_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_pch)
* [ ] [`_mm512_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask)
- * [ ] [`_mm512_cvt_roundepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi16_ph)
- * [ ] [`_mm512_cvt_roundepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ph)
- * [ ] [`_mm512_cvt_roundepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi64_ph)
- * [ ] [`_mm512_cvt_roundepu16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu16_ph)
- * [ ] [`_mm512_cvt_roundepu32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu32_ph)
- * [ ] [`_mm512_cvt_roundepu64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu64_ph)
- * [ ] [`_mm512_cvt_roundpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_ph)
* [ ] [`_mm512_cvt_roundph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi16)
* [ ] [`_mm512_cvt_roundph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi32)
* [ ] [`_mm512_cvt_roundph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi64)
@@ -70,13 +63,6 @@
* [ ] [`_mm512_cvt_roundph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu32)
* [ ] [`_mm512_cvt_roundph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu64)
* [ ] [`_mm512_cvt_roundph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_pd)
- * [ ] [`_mm512_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_ph)
- * [ ] [`_mm512_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_ph)
- * [ ] [`_mm512_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi64_ph)
- * [ ] [`_mm512_cvtepu16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu16_ph)
- * [ ] [`_mm512_cvtepu32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu32_ph)
- * [ ] [`_mm512_cvtepu64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu64_ph)
- * [ ] [`_mm512_cvtpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_ph)
* [ ] [`_mm512_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi16)
* [ ] [`_mm512_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi32)
* [ ] [`_mm512_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi64)
@@ -98,17 +84,8 @@
* [ ] [`_mm512_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu32)
* [ ] [`_mm512_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu64)
* [ ] [`_mm512_cvtx_roundph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundph_ps)
- * [ ] [`_mm512_cvtx_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundps_ph)
* [ ] [`_mm512_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps)
- * [ ] [`_mm512_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph)
* [ ] [`_mm512_mask_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask)
- * [ ] [`_mm512_mask_cvt_roundepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi16_ph)
- * [ ] [`_mm512_mask_cvt_roundepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ph)
- * [ ] [`_mm512_mask_cvt_roundepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi64_ph)
- * [ ] [`_mm512_mask_cvt_roundepu16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu16_ph)
- * [ ] [`_mm512_mask_cvt_roundepu32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu32_ph)
- * [ ] [`_mm512_mask_cvt_roundepu64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu64_ph)
- * [ ] [`_mm512_mask_cvt_roundpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_ph)
* [ ] [`_mm512_mask_cvt_roundph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi16)
* [ ] [`_mm512_mask_cvt_roundph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi32)
* [ ] [`_mm512_mask_cvt_roundph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi64)
@@ -116,13 +93,6 @@
* [ ] [`_mm512_mask_cvt_roundph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu32)
* [ ] [`_mm512_mask_cvt_roundph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu64)
* [ ] [`_mm512_mask_cvt_roundph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_pd)
- * [ ] [`_mm512_mask_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_ph)
- * [ ] [`_mm512_mask_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_ph)
- * [ ] [`_mm512_mask_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_ph)
- * [ ] [`_mm512_mask_cvtepu16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu16_ph)
- * [ ] [`_mm512_mask_cvtepu32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu32_ph)
- * [ ] [`_mm512_mask_cvtepu64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu64_ph)
- * [ ] [`_mm512_mask_cvtpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_ph)
* [ ] [`_mm512_mask_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi16)
* [ ] [`_mm512_mask_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi32)
* [ ] [`_mm512_mask_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi64)
@@ -143,16 +113,7 @@
* [ ] [`_mm512_mask_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu32)
* [ ] [`_mm512_mask_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu64)
* [ ] [`_mm512_mask_cvtx_roundph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundph_ps)
- * [ ] [`_mm512_mask_cvtx_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundps_ph)
* [ ] [`_mm512_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps)
- * [ ] [`_mm512_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph)
- * [ ] [`_mm512_maskz_cvt_roundepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph)
- * [ ] [`_mm512_maskz_cvt_roundepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph)
- * [ ] [`_mm512_maskz_cvt_roundepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph)
- * [ ] [`_mm512_maskz_cvt_roundepu16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu16_ph)
- * [ ] [`_mm512_maskz_cvt_roundepu32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu32_ph)
- * [ ] [`_mm512_maskz_cvt_roundepu64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu64_ph)
- * [ ] [`_mm512_maskz_cvt_roundpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_ph)
* [ ] [`_mm512_maskz_cvt_roundph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi16)
* [ ] [`_mm512_maskz_cvt_roundph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi32)
* [ ] [`_mm512_maskz_cvt_roundph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi64)
@@ -160,13 +121,6 @@
* [ ] [`_mm512_maskz_cvt_roundph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu32)
* [ ] [`_mm512_maskz_cvt_roundph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu64)
* [ ] [`_mm512_maskz_cvt_roundph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_pd)
- * [ ] [`_mm512_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_ph)
- * [ ] [`_mm512_maskz_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_ph)
- * [ ] [`_mm512_maskz_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi64_ph)
- * [ ] [`_mm512_maskz_cvtepu16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu16_ph)
- * [ ] [`_mm512_maskz_cvtepu32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu32_ph)
- * [ ] [`_mm512_maskz_cvtepu64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu64_ph)
- * [ ] [`_mm512_maskz_cvtpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_ph)
* [ ] [`_mm512_maskz_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi16)
* [ ] [`_mm512_maskz_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi32)
* [ ] [`_mm512_maskz_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi64)
@@ -187,25 +141,14 @@
* [ ] [`_mm512_maskz_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu32)
* [ ] [`_mm512_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu64)
* [ ] [`_mm512_maskz_cvtx_roundph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundph_ps)
- * [ ] [`_mm512_maskz_cvtx_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph)
* [ ] [`_mm512_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps)
- * [ ] [`_mm512_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph)
* [ ] [`_mm512_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_pch)
- * [ ] [`_mm_cvt_roundi32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh)
- * [ ] [`_mm_cvt_roundi64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sh)
- * [ ] [`_mm_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_sh)
* [ ] [`_mm_cvt_roundsh_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i32)
* [ ] [`_mm_cvt_roundsh_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i64)
* [ ] [`_mm_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_sd)
* [ ] [`_mm_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_ss)
* [ ] [`_mm_cvt_roundsh_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u32)
* [ ] [`_mm_cvt_roundsh_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u64)
- * [ ] [`_mm_cvt_roundss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_sh)
- * [ ] [`_mm_cvt_roundu32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu32_sh)
- * [ ] [`_mm_cvt_roundu64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_sh)
- * [ ] [`_mm_cvti32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti32_sh)
- * [ ] [`_mm_cvti64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti64_sh)
- * [ ] [`_mm_cvtsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_sh)
* [ ] [`_mm_cvtsh_h`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_h)
* [ ] [`_mm_cvtsh_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i32)
* [ ] [`_mm_cvtsh_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i64)
@@ -215,7 +158,6 @@
* [ ] [`_mm_cvtsh_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u64)
* [ ] [`_mm_cvtsi128_si16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si16)
* [ ] [`_mm_cvtsi16_si128`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi16_si128)
- * [ ] [`_mm_cvtss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sh)
* [ ] [`_mm_cvtt_roundsh_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i32)
* [ ] [`_mm_cvtt_roundsh_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i64)
* [ ] [`_mm_cvtt_roundsh_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u32)
@@ -224,37 +166,20 @@
* [ ] [`_mm_cvttsh_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i64)
* [ ] [`_mm_cvttsh_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u32)
* [ ] [`_mm_cvttsh_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u64)
- * [ ] [`_mm_cvtu32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh)
- * [ ] [`_mm_cvtu64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sh)
- * [ ] [`_mm_mask_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh)
* [ ] [`_mm_mask_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd)
* [ ] [`_mm_mask_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss)
- * [ ] [`_mm_mask_cvt_roundss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundss_sh)
- * [ ] [`_mm_mask_cvtsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsd_sh)
* [ ] [`_mm_mask_cvtsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd)
* [ ] [`_mm_mask_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss)
- * [ ] [`_mm_mask_cvtss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh)
- * [ ] [`_mm_maskz_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh)
* [ ] [`_mm_maskz_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd)
* [ ] [`_mm_maskz_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss)
- * [ ] [`_mm_maskz_cvt_roundss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundss_sh)
- * [ ] [`_mm_maskz_cvtsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsd_sh)
* [ ] [`_mm_maskz_cvtsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd)
* [ ] [`_mm_maskz_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss)
- * [ ] [`_mm_maskz_cvtss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh)
* [ ] [`_mm_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pch)
["AVX512_FP16", "AVX512VL"]
- * [ ] [`_mm256_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_ph)
- * [ ] [`_mm256_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ph)
- * [ ] [`_mm256_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi64_ph)
- * [ ] [`_mm256_cvtepu16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_ph)
- * [ ] [`_mm256_cvtepu32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_ph)
- * [ ] [`_mm256_cvtepu64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu64_ph)
- * [ ] [`_mm256_cvtpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_ph)
* [ ] [`_mm256_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi16)
* [ ] [`_mm256_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi32)
* [ ] [`_mm256_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi64)
@@ -269,14 +194,6 @@
* [ ] [`_mm256_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu32)
* [ ] [`_mm256_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu64)
* [ ] [`_mm256_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps)
- * [ ] [`_mm256_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph)
- * [ ] [`_mm256_mask_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph)
- * [ ] [`_mm256_mask_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi32_ph)
- * [ ] [`_mm256_mask_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi64_ph)
- * [ ] [`_mm256_mask_cvtepu16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu16_ph)
- * [ ] [`_mm256_mask_cvtepu32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu32_ph)
- * [ ] [`_mm256_mask_cvtepu64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu64_ph)
- * [ ] [`_mm256_mask_cvtpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_ph)
* [ ] [`_mm256_mask_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi16)
* [ ] [`_mm256_mask_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi32)
* [ ] [`_mm256_mask_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi64)
@@ -291,14 +208,6 @@
* [ ] [`_mm256_mask_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu32)
* [ ] [`_mm256_mask_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu64)
* [ ] [`_mm256_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps)
- * [ ] [`_mm256_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph)
- * [ ] [`_mm256_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph)
- * [ ] [`_mm256_maskz_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph)
- * [ ] [`_mm256_maskz_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph)
- * [ ] [`_mm256_maskz_cvtepu16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu16_ph)
- * [ ] [`_mm256_maskz_cvtepu32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu32_ph)
- * [ ] [`_mm256_maskz_cvtepu64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu64_ph)
- * [ ] [`_mm256_maskz_cvtpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_ph)
* [ ] [`_mm256_maskz_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi16)
* [ ] [`_mm256_maskz_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi32)
* [ ] [`_mm256_maskz_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi64)
@@ -313,14 +222,6 @@
* [ ] [`_mm256_maskz_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu32)
* [ ] [`_mm256_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64)
* [ ] [`_mm256_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps)
- * [ ] [`_mm256_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph)
- * [ ] [`_mm_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph)
- * [ ] [`_mm_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph)
- * [ ] [`_mm_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi64_ph)
- * [ ] [`_mm_cvtepu16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_ph)
- * [ ] [`_mm_cvtepu32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_ph)
- * [ ] [`_mm_cvtepu64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu64_ph)
- * [ ] [`_mm_cvtpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ph)
* [ ] [`_mm_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi16)
* [ ] [`_mm_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi32)
* [ ] [`_mm_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi64)
@@ -335,14 +236,6 @@
* [ ] [`_mm_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu32)
* [ ] [`_mm_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu64)
* [ ] [`_mm_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps)
- * [ ] [`_mm_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph)
- * [ ] [`_mm_mask_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph)
- * [ ] [`_mm_mask_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi32_ph)
- * [ ] [`_mm_mask_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi64_ph)
- * [ ] [`_mm_mask_cvtepu16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu16_ph)
- * [ ] [`_mm_mask_cvtepu32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu32_ph)
- * [ ] [`_mm_mask_cvtepu64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu64_ph)
- * [ ] [`_mm_mask_cvtpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_ph)
* [ ] [`_mm_mask_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi16)
* [ ] [`_mm_mask_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi32)
* [ ] [`_mm_mask_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi64)
@@ -357,14 +250,6 @@
* [ ] [`_mm_mask_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu32)
* [ ] [`_mm_mask_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu64)
* [ ] [`_mm_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps)
- * [ ] [`_mm_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph)
- * [ ] [`_mm_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph)
- * [ ] [`_mm_maskz_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph)
- * [ ] [`_mm_maskz_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph)
- * [ ] [`_mm_maskz_cvtepu16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu16_ph)
- * [ ] [`_mm_maskz_cvtepu32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu32_ph)
- * [ ] [`_mm_maskz_cvtepu64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu64_ph)
- * [ ] [`_mm_maskz_cvtpd_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_ph)
* [ ] [`_mm_maskz_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi16)
* [ ] [`_mm_maskz_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi32)
* [ ] [`_mm_maskz_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi64)
@@ -379,7 +264,6 @@
* [ ] [`_mm_maskz_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu32)
* [ ] [`_mm_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64)
* [ ] [`_mm_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps)
- * [ ] [`_mm_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph)
diff --git a/crates/core_arch/src/x86/avx512fp16.rs b/crates/core_arch/src/x86/avx512fp16.rs
index 3889ce1f5e..be99002e51 100644
--- a/crates/core_arch/src/x86/avx512fp16.rs
+++ b/crates/core_arch/src/x86/avx512fp16.rs
@@ -11274,6 +11274,1811 @@ pub unsafe fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h {
_mm512_castsi512_ph(_mm512_permutexvar_epi16(idx, _mm512_castph_si512(a)))
}
+/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtepi16_ph(a: __m128i) -> __m128h {
+ vcvtw2ph_128(a.as_i16x8(), _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtepi16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
+ simd_select_bitmask(k, _mm_cvtepi16_ph(a), src)
+}
+
+/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtepi16_ph(k: __mmask8, a: __m128i) -> __m128h {
+ _mm_mask_cvtepi16_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtepi16_ph(a: __m256i) -> __m256h {
+ vcvtw2ph_256(a.as_i16x16(), _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtepi16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
+ simd_select_bitmask(k, _mm256_cvtepi16_ph(a), src)
+}
+
+/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtepi16_ph(k: __mmask16, a: __m256i) -> __m256h {
+ _mm256_mask_cvtepi16_ph(_mm256_setzero_ph(), k, a)
+}
+
+/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtepi16_ph(a: __m512i) -> __m512h {
+ vcvtw2ph_512(a.as_i16x32(), _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtepi16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
+ simd_select_bitmask(k, _mm512_cvtepi16_ph(a), src)
+}
+
+/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtepi16_ph(k: __mmask32, a: __m512i) -> __m512h {
+ _mm512_mask_cvtepi16_ph(_mm512_setzero_ph(), k, a)
+}
+
+/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundepi16_ph(a: __m512i) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ vcvtw2ph_512(a.as_i16x32(), ROUNDING)
+}
+
+/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundepi16_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512i,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_cvt_roundepi16_ph::(a), src)
+}
+
+/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundepi16_ph(
+ k: __mmask32,
+ a: __m512i,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundepi16_ph::(_mm512_setzero_ph(), k, a)
+}
+
+/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtuw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtepu16_ph(a: __m128i) -> __m128h {
+ vcvtuw2ph_128(a.as_u16x8(), _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtuw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtepu16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
+ simd_select_bitmask(k, _mm_cvtepu16_ph(a), src)
+}
+
+/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtuw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtepu16_ph(k: __mmask8, a: __m128i) -> __m128h {
+ _mm_mask_cvtepu16_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtuw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtepu16_ph(a: __m256i) -> __m256h {
+ vcvtuw2ph_256(a.as_u16x16(), _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtuw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtepu16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
+ simd_select_bitmask(k, _mm256_cvtepu16_ph(a), src)
+}
+
+/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtuw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtepu16_ph(k: __mmask16, a: __m256i) -> __m256h {
+ _mm256_mask_cvtepu16_ph(_mm256_setzero_ph(), k, a)
+}
+
+/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtuw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtepu16_ph(a: __m512i) -> __m512h {
+ vcvtuw2ph_512(a.as_u16x32(), _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtuw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtepu16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
+ simd_select_bitmask(k, _mm512_cvtepu16_ph(a), src)
+}
+
+/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtuw2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtepu16_ph(k: __mmask32, a: __m512i) -> __m512h {
+ _mm512_mask_cvtepu16_ph(_mm512_setzero_ph(), k, a)
+}
+
+/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundepu16_ph(a: __m512i) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ vcvtuw2ph_512(a.as_u16x32(), ROUNDING)
+}
+
+/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundepu16_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512i,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_cvt_roundepu16_ph::(a), src)
+}
+
+/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu16_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundepu16_ph(
+ k: __mmask32,
+ a: __m512i,
+) -> __m512h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundepu16_ph::(_mm512_setzero_ph(), k, a)
+}
+
+/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst. The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtdq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtepi32_ph(a: __m128i) -> __m128h {
+ _mm_mask_cvtepi32_ph(_mm_setzero_ph(), 0xff, a)
+}
+
+/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set). The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtdq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
+ vcvtdq2ph_128(a.as_i32x4(), src, k)
+}
+
+/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtdq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtepi32_ph(k: __mmask8, a: __m128i) -> __m128h {
+ _mm_mask_cvtepi32_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtdq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtepi32_ph(a: __m256i) -> __m128h {
+ vcvtdq2ph_256(a.as_i32x8(), _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtdq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
+ simd_select_bitmask(k, _mm256_cvtepi32_ph(a), src)
+}
+
+/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtdq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtepi32_ph(k: __mmask8, a: __m256i) -> __m128h {
+ _mm256_mask_cvtepi32_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtdq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtepi32_ph(a: __m512i) -> __m256h {
+ vcvtdq2ph_512(a.as_i32x16(), _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtdq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtepi32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
+ simd_select_bitmask(k, _mm512_cvtepi32_ph(a), src)
+}
+
+/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtdq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtepi32_ph(k: __mmask16, a: __m512i) -> __m256h {
+ _mm512_mask_cvtepi32_ph(_mm256_setzero_ph(), k, a)
+}
+
+/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundepi32_ph(a: __m512i) -> __m256h {
+ static_assert_rounding!(ROUNDING);
+ vcvtdq2ph_512(a.as_i32x16(), ROUNDING)
+}
+
+/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundepi32_ph(
+ src: __m256h,
+ k: __mmask16,
+ a: __m512i,
+) -> __m256h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_cvt_roundepi32_ph::(a), src)
+}
+
+/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundepi32_ph(
+ k: __mmask16,
+ a: __m512i,
+) -> __m256h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundepi32_ph::(_mm256_setzero_ph(), k, a)
+}
+
+/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the
+/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
+/// of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti32_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsi2sh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvti32_sh(a: __m128h, b: i32) -> __m128h {
+ vcvtsi2sh(a, b, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the
+/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
+/// of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvt_roundi32_sh(a: __m128h, b: i32) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ vcvtsi2sh(a, b, ROUNDING)
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst. The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtudq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtepu32_ph(a: __m128i) -> __m128h {
+ _mm_mask_cvtepu32_ph(_mm_setzero_ph(), 0xff, a)
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set). The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtudq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
+ vcvtudq2ph_128(a.as_u32x4(), src, k)
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtudq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtepu32_ph(k: __mmask8, a: __m128i) -> __m128h {
+ _mm_mask_cvtepu32_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtudq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtepu32_ph(a: __m256i) -> __m128h {
+ vcvtudq2ph_256(a.as_u32x8(), _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtudq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
+ simd_select_bitmask(k, _mm256_cvtepu32_ph(a), src)
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtudq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtepu32_ph(k: __mmask8, a: __m256i) -> __m128h {
+ _mm256_mask_cvtepu32_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtudq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtepu32_ph(a: __m512i) -> __m256h {
+ vcvtudq2ph_512(a.as_u32x16(), _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtudq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtepu32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
+ simd_select_bitmask(k, _mm512_cvtepu32_ph(a), src)
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtudq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtepu32_ph(k: __mmask16, a: __m512i) -> __m256h {
+ _mm512_mask_cvtepu32_ph(_mm256_setzero_ph(), k, a)
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundepu32_ph(a: __m512i) -> __m256h {
+ static_assert_rounding!(ROUNDING);
+ vcvtudq2ph_512(a.as_u32x16(), ROUNDING)
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundepu32_ph(
+ src: __m256h,
+ k: __mmask16,
+ a: __m512i,
+) -> __m256h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_cvt_roundepu32_ph::(a), src)
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu32_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundepu32_ph(
+ k: __mmask16,
+ a: __m512i,
+) -> __m256h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundepu32_ph::(_mm256_setzero_ph(), k, a)
+}
+
+/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the
+/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
+/// of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtusi2sh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtu32_sh(a: __m128h, b: u32) -> __m128h {
+ vcvtusi2sh(a, b, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the
+/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
+/// of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu32_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvt_roundu32_sh(a: __m128h, b: u32) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ vcvtusi2sh(a, b, ROUNDING)
+}
+
+/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst. The upper 96 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtepi64_ph(a: __m128i) -> __m128h {
+ _mm_mask_cvtepi64_ph(_mm_setzero_ph(), 0xff, a)
+}
+
+/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set). The upper 96 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
+ vcvtqq2ph_128(a.as_i64x2(), src, k)
+}
+
+/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// The upper 96 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtepi64_ph(k: __mmask8, a: __m128i) -> __m128h {
+ _mm_mask_cvtepi64_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst. The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtepi64_ph(a: __m256i) -> __m128h {
+ _mm256_mask_cvtepi64_ph(_mm_setzero_ph(), 0xff, a)
+}
+
+/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set). The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
+ vcvtqq2ph_256(a.as_i64x4(), src, k)
+}
+
+/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtepi64_ph(k: __mmask8, a: __m256i) -> __m128h {
+ _mm256_mask_cvtepi64_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtepi64_ph(a: __m512i) -> __m128h {
+ vcvtqq2ph_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
+ simd_select_bitmask(k, _mm512_cvtepi64_ph(a), src)
+}
+
+/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtepi64_ph(k: __mmask8, a: __m512i) -> __m128h {
+ _mm512_mask_cvtepi64_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundepi64_ph(a: __m512i) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ vcvtqq2ph_512(a.as_i64x8(), ROUNDING)
+}
+
+/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundepi64_ph(
+ src: __m128h,
+ k: __mmask8,
+ a: __m512i,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_cvt_roundepi64_ph::(a), src)
+}
+
+/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundepi64_ph(
+ k: __mmask8,
+ a: __m512i,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundepi64_ph::(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst. The upper 96 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtepu64_ph(a: __m128i) -> __m128h {
+ _mm_mask_cvtepu64_ph(_mm_setzero_ph(), 0xff, a)
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set). The upper 96 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
+ vcvtuqq2ph_128(a.as_u64x2(), src, k)
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// The upper 96 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtepu64_ph(k: __mmask8, a: __m128i) -> __m128h {
+ _mm_mask_cvtepu64_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst. The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtepu64_ph(a: __m256i) -> __m128h {
+ _mm256_mask_cvtepu64_ph(_mm_setzero_ph(), 0xff, a)
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set). The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
+ vcvtuqq2ph_256(a.as_u64x4(), src, k)
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtepu64_ph(k: __mmask8, a: __m256i) -> __m128h {
+ _mm256_mask_cvtepu64_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtepu64_ph(a: __m512i) -> __m128h {
+ vcvtuqq2ph_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
+ simd_select_bitmask(k, _mm512_cvtepu64_ph(a), src)
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtepu64_ph(k: __mmask8, a: __m512i) -> __m128h {
+ _mm512_mask_cvtepu64_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundepu64_ph(a: __m512i) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ vcvtuqq2ph_512(a.as_u64x8(), ROUNDING)
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundepu64_ph(
+ src: __m128h,
+ k: __mmask8,
+ a: __m512i,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ simd_select_bitmask(k, _mm512_cvt_roundepu64_ph::(a), src)
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu64_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundepu64_ph(
+ k: __mmask8,
+ a: __m512i,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundepu64_ph::(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2phx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtxps_ph(a: __m128) -> __m128h {
+ _mm_mask_cvtxps_ph(_mm_setzero_ph(), 0xff, a)
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
+/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2phx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m128) -> __m128h {
+ vcvtps2phx_128(a, src, k)
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2phx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtxps_ph(k: __mmask8, a: __m128) -> __m128h {
+ _mm_mask_cvtxps_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2phx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtxps_ph(a: __m256) -> __m128h {
+ _mm256_mask_cvtxps_ph(_mm_setzero_ph(), 0xff, a)
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
+/// when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2phx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m256) -> __m128h {
+ vcvtps2phx_256(a, src, k)
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2phx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtxps_ph(k: __mmask8, a: __m256) -> __m128h {
+ _mm256_mask_cvtxps_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtps2phx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtxps_ph(a: __m512) -> __m256h {
+ _mm512_mask_cvtxps_ph(_mm256_setzero_ph(), 0xffff, a)
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
+/// when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtps2phx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtxps_ph(src: __m256h, k: __mmask16, a: __m512) -> __m256h {
+ vcvtps2phx_512(a, src, k, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtps2phx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtxps_ph(k: __mmask16, a: __m512) -> __m256h {
+ _mm512_mask_cvtxps_ph(_mm256_setzero_ph(), k, a)
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundps_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtx_roundps_ph(a: __m512) -> __m256h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvtx_roundps_ph::(_mm256_setzero_ph(), 0xffff, a)
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
+/// when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundps_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtx_roundps_ph(
+ src: __m256h,
+ k: __mmask16,
+ a: __m512,
+) -> __m256h {
+ static_assert_rounding!(ROUNDING);
+ vcvtps2phx_512(a, src, k, ROUNDING)
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtx_roundps_ph(
+ k: __mmask16,
+ a: __m512,
+) -> __m256h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvtx_roundps_ph::(_mm256_setzero_ph(), k, a)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
+/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
+/// elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtss2sh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtss_sh(a: __m128h, b: __m128) -> __m128h {
+ _mm_mask_cvtss_sh(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
+/// floating-point elements, store the result in the lower element of dst using writemask k (the element
+/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
+/// upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtss2sh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtss_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128) -> __m128h {
+ vcvtss2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
+/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtss2sh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtss_sh(k: __mmask8, a: __m128h, b: __m128) -> __m128h {
+ _mm_mask_cvtss_sh(_mm_setzero_ph(), k, a, b)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
+/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
+/// elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvt_roundss_sh(a: __m128h, b: __m128) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_cvt_roundss_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
+/// floating-point elements, store the result in the lower element of dst using writemask k (the element
+/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
+/// upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundss_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvt_roundss_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ vcvtss2sh(a, b, src, k, ROUNDING)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
+/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundss_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvt_roundss_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_cvt_roundss_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst. The upper 96 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtpd_ph(a: __m128d) -> __m128h {
+ _mm_mask_cvtpd_ph(_mm_setzero_ph(), 0xff, a)
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
+/// when the corresponding mask bit is not set). The upper 96 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m128d) -> __m128h {
+ vcvtpd2ph_128(a, src, k)
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set). The upper 96 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtpd_ph(k: __mmask8, a: __m128d) -> __m128h {
+ _mm_mask_cvtpd_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst. The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtpd_ph(a: __m256d) -> __m128h {
+ _mm256_mask_cvtpd_ph(_mm_setzero_ph(), 0xff, a)
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
+/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m256d) -> __m128h {
+ vcvtpd2ph_256(a, src, k)
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtpd_ph(k: __mmask8, a: __m256d) -> __m128h {
+ _mm256_mask_cvtpd_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtpd2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtpd_ph(a: __m512d) -> __m128h {
+ _mm512_mask_cvtpd_ph(_mm_setzero_ph(), 0xff, a)
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
+/// when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtpd2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m512d) -> __m128h {
+ vcvtpd2ph_512(a, src, k, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtpd2ph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtpd_ph(k: __mmask8, a: __m512d) -> __m128h {
+ _mm512_mask_cvtpd_ph(_mm_setzero_ph(), k, a)
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundpd_ph(a: __m512d) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundpd_ph::(_mm_setzero_ph(), 0xff, a)
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
+/// when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundpd_ph(
+ src: __m128h,
+ k: __mmask8,
+ a: __m512d,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ vcvtpd2ph_512(a, src, k, ROUNDING)
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundpd_ph(k: __mmask8, a: __m512d) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundpd_ph::(_mm_setzero_ph(), k, a)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
+/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
+/// elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsd2sh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtsd_sh(a: __m128h, b: __m128d) -> __m128h {
+ _mm_mask_cvtsd_sh(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
+/// floating-point elements, store the result in the lower element of dst using writemask k (the element
+/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
+/// upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsd_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsd2sh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtsd_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
+ vcvtsd2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
+/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsd_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsd2sh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtsd_sh(k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
+ _mm_mask_cvtsd_sh(_mm_setzero_ph(), k, a, b)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
+/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
+/// elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvt_roundsd_sh(a: __m128h, b: __m128d) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_cvt_roundsd_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
+/// floating-point elements, store the result in the lower element of dst using writemask k (the element
+/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
+/// upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvt_roundsd_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128d,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ vcvtsd2sh(a, b, src, k, ROUNDING)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
+/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
+/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvt_roundsd_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128d,
+) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ _mm_mask_cvt_roundsd_sh::(_mm_setzero_ph(), k, a, b)
+}
+
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
@@ -11281,1035 +13086,1669 @@ extern "C" {
#[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
- #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
- fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
- fn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
- fn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
- fn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
+ fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
+ fn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
+ fn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
+ fn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
+ fn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
+ fn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
+ fn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
+ fn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
+ fn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
+ fn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
+ fn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
+ fn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
+ fn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
+ fn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
+ fn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
+ fn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
+ fn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
+ fn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
+ fn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
+ fn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
+ fn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
+ fn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
+ fn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
+ fn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
+ fn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
+ fn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
+ fn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
+ fn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
+ fn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
+ -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
+ fn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
+ -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
+ fn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
+ fn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
+ fn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.fma.f16"]
+ fn fmaf16(a: f16, b: f16, c: f16) -> f16; // TODO: use `crate::intrinsics::fmaf16` when it's available
+ #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
+ fn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
+
+ #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"]
+ fn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"]
+ fn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
+ fn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128"]
+ fn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256"]
+ fn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512"]
+ fn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.sh"]
+ fn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128"]
+ fn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256"]
+ fn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512"]
+ fn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh"]
+ fn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.sqrt.ph.512"]
+ fn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh"]
+ fn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.max.ph.128"]
+ fn vmaxph_128(a: __m128h, b: __m128h) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.max.ph.256"]
+ fn vmaxph_256(a: __m256h, b: __m256h) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.max.ph.512"]
+ fn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.max.sh.round"]
+ fn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.min.ph.128"]
+ fn vminph_128(a: __m128h, b: __m128h) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.min.ph.256"]
+ fn vminph_256(a: __m256h, b: __m256h) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.min.ph.512"]
+ fn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.min.sh.round"]
+ fn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.128"]
+ fn vgetexpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.256"]
+ fn vgetexpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.512"]
+ fn vgetexpph_512(a: __m512h, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getexp.sh"]
+ fn vgetexpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.128"]
+ fn vgetmantph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.256"]
+ fn vgetmantph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.512"]
+ fn vgetmantph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getmant.sh"]
+ fn vgetmantsh(
+ a: __m128h,
+ b: __m128h,
+ imm8: i32,
+ src: __m128h,
+ k: __mmask8,
+ sae: i32,
+ ) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.128"]
+ fn vrndscaleph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.256"]
+ fn vrndscaleph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.512"]
+ fn vrndscaleph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rndscale.sh"]
+ fn vrndscalesh(
+ a: __m128h,
+ b: __m128h,
+ src: __m128h,
+ k: __mmask8,
+ imm8: i32,
+ sae: i32,
+ ) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.128"]
+ fn vscalefph_128(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.256"]
+ fn vscalefph_256(a: __m256h, b: __m256h, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.512"]
+ fn vscalefph_512(a: __m512h, b: __m512h, src: __m512h, k: __mmask32, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.scalef.sh"]
+ fn vscalefsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.128"]
+ fn vreduceph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.256"]
+ fn vreduceph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.512"]
+ fn vreduceph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.reduce.sh"]
+ fn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32)
+ -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.fpclass.sh"]
+ fn vfpclasssh(a: __m128h, imm8: i32, k: __mmask8) -> __mmask8;
+
+ #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i16"]
+ fn vcvtw2ph_128(a: i16x8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i16"]
+ fn vcvtw2ph_256(a: i16x16, rounding: i32) -> __m256h;
+ #[link_name = "llvm.x86.avx512.sitofp.round.v32f16.v32i16"]
+ fn vcvtw2ph_512(a: i16x32, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8u16"]
+ fn vcvtuw2ph_128(a: u16x8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16u16"]
+ fn vcvtuw2ph_256(a: u16x16, rounding: i32) -> __m256h;
+ #[link_name = "llvm.x86.avx512.uitofp.round.v32f16.v32u16"]
+ fn vcvtuw2ph_512(a: u16x32, rounding: i32) -> __m512h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtdq2ph.128"]
+ fn vcvtdq2ph_128(a: i32x4, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i32"]
+ fn vcvtdq2ph_256(a: i32x8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i32"]
+ fn vcvtdq2ph_512(a: i32x16, rounding: i32) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.vcvtsi2sh"]
+ fn vcvtsi2sh(a: __m128h, b: i32, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtudq2ph.128"]
+ fn vcvtudq2ph_128(a: u32x4, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8u32"]
+ fn vcvtudq2ph_256(a: u32x8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16u32"]
+ fn vcvtudq2ph_512(a: u32x16, rounding: i32) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.vcvtusi2sh"]
+ fn vcvtusi2sh(a: __m128h, b: u32, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.128"]
+ fn vcvtqq2ph_128(a: i64x2, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.256"]
+ fn vcvtqq2ph_256(a: i64x4, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i64"]
+ fn vcvtqq2ph_512(a: i64x8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.128"]
+ fn vcvtuqq2ph_128(a: u64x2, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.256"]
+ fn vcvtuqq2ph_256(a: u64x4, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8u64"]
+ fn vcvtuqq2ph_512(a: u64x8, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.128"]
+ fn vcvtps2phx_128(a: __m128, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.256"]
+ fn vcvtps2phx_256(a: __m256, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.512"]
+ fn vcvtps2phx_512(a: __m512, src: __m256h, k: __mmask16, rounding: i32) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtss2sh.round"]
+ fn vcvtss2sh(a: __m128h, b: __m128, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.128"]
+ fn vcvtpd2ph_128(a: __m128d, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.256"]
+ fn vcvtpd2ph_256(a: __m256d, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.512"]
+ fn vcvtpd2ph_512(a: __m512d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtsd2sh.round"]
+ fn vcvtsd2sh(a: __m128h, b: __m128d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::x86::*;
+ use crate::mem::transmute;
+ use crate::ptr::{addr_of, addr_of_mut};
+ use stdarch_test::simd_test;
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
+ _mm_setr_ph(re, im, re, im, re, im, re, im)
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
- fn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
- fn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
- fn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
- fn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
+ _mm256_setr_ph(
+ re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
+ )
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
- fn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
- fn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
- fn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
- fn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
+ _mm512_setr_ph(
+ re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
+ re, im, re, im, re, im, re, im, re, im,
+ )
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
- fn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
- fn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
- fn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
- fn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set_ph() {
+ let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_set_ph() {
+ let r = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let e = _mm256_setr_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_set_ph() {
+ let r = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let e = _mm512_setr_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set_sh() {
+ let r = _mm_set_sh(1.0);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set1_ph() {
+ let r = _mm_set1_ph(1.0);
+ let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_set1_ph() {
+ let r = _mm256_set1_ph(1.0);
+ let e = _mm256_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_set1_ph() {
+ let r = _mm512_set1_ph(1.0);
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_setr_ph() {
+ let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_setr_ph() {
+ let r = _mm256_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let e = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_setr_ph() {
+ let r = _mm512_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let e = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_setzero_ph() {
+ let r = _mm_setzero_ph();
+ let e = _mm_set1_ph(0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_setzero_ph() {
+ let r = _mm256_setzero_ph();
+ let e = _mm256_set1_ph(0.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_setzero_ph() {
+ let r = _mm512_setzero_ph();
+ let e = _mm512_set1_ph(0.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castsi128_ph() {
+ let a = _mm_set1_epi16(0x3c00);
+ let r = _mm_castsi128_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castsi256_ph() {
+ let a = _mm256_set1_epi16(0x3c00);
+ let r = _mm256_castsi256_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castsi512_ph() {
+ let a = _mm512_set1_epi16(0x3c00);
+ let r = _mm512_castsi512_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_si128() {
+ let a = _mm_set1_ph(1.0);
+ let r = _mm_castph_si128(a);
+ let e = _mm_set1_epi16(0x3c00);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_si256() {
+ let a = _mm256_set1_ph(1.0);
+ let r = _mm256_castph_si256(a);
+ let e = _mm256_set1_epi16(0x3c00);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_si512() {
+ let a = _mm512_set1_ph(1.0);
+ let r = _mm512_castph_si512(a);
+ let e = _mm512_set1_epi16(0x3c00);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castps_ph() {
+ let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
+ let r = _mm_castps_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castps_ph() {
+ let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
+ let r = _mm256_castps_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castps_ph() {
+ let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
+ let r = _mm512_castps_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_ps() {
+ let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
+ let r = _mm_castph_ps(a);
+ let e = _mm_set1_ps(1.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_ps() {
+ let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
+ let r = _mm256_castph_ps(a);
+ let e = _mm256_set1_ps(1.0);
+ assert_eq_m256(r, e);
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
- fn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
- fn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
- fn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
- fn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
- fn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
- fn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
- fn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
- fn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_ps() {
+ let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
+ let r = _mm512_castph_ps(a);
+ let e = _mm512_set1_ps(1.0);
+ assert_eq_m512(r, e);
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
- fn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
- fn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
- fn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
- fn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
- fn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
- -> __m512;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
- fn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
- -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
- fn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
- fn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castpd_ph() {
+ let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
+ let r = _mm_castpd_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
- #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
- fn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.fma.f16"]
- fn fmaf16(a: f16, b: f16, c: f16) -> f16; // TODO: use `crate::intrinsics::fmaf16` when it's available
- #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
- fn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castpd_ph() {
+ let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
+ let r = _mm256_castpd_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
- #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"]
- fn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"]
- fn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
- fn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castpd_ph() {
+ let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
+ let r = _mm512_castpd_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128"]
- fn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256"]
- fn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512"]
- fn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.sh"]
- fn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_pd() {
+ let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
+ let r = _mm_castph_pd(a);
+ let e = _mm_set1_pd(1.0);
+ assert_eq_m128d(r, e);
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128"]
- fn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256"]
- fn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512"]
- fn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh"]
- fn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_pd() {
+ let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
+ let r = _mm256_castph_pd(a);
+ let e = _mm256_set1_pd(1.0);
+ assert_eq_m256d(r, e);
+ }
- #[link_name = "llvm.x86.avx512fp16.sqrt.ph.512"]
- fn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh"]
- fn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_pd() {
+ let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
+ let r = _mm512_castph_pd(a);
+ let e = _mm512_set1_pd(1.0);
+ assert_eq_m512d(r, e);
+ }
- #[link_name = "llvm.x86.avx512fp16.max.ph.128"]
- fn vmaxph_128(a: __m128h, b: __m128h) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.max.ph.256"]
- fn vmaxph_256(a: __m256h, b: __m256h) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.max.ph.512"]
- fn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.max.sh.round"]
- fn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph256_ph128() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm256_castph256_ph128(a);
+ let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ assert_eq_m128h(r, e);
+ }
- #[link_name = "llvm.x86.avx512fp16.min.ph.128"]
- fn vminph_128(a: __m128h, b: __m128h) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.min.ph.256"]
- fn vminph_256(a: __m256h, b: __m256h) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.min.ph.512"]
- fn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.min.sh.round"]
- fn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph512_ph128() {
+ let a = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
+ 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ );
+ let r = _mm512_castph512_ph128(a);
+ let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ assert_eq_m128h(r, e);
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.128"]
- fn vgetexpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.256"]
- fn vgetexpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.512"]
- fn vgetexpph_512(a: __m512h, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.getexp.sh"]
- fn vgetexpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph512_ph256() {
+ let a = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
+ 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ );
+ let r = _mm512_castph512_ph256(a);
+ let e = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ assert_eq_m256h(r, e);
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.128"]
- fn vgetmantph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.256"]
- fn vgetmantph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.512"]
- fn vgetmantph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.getmant.sh"]
- fn vgetmantsh(
- a: __m128h,
- b: __m128h,
- imm8: i32,
- src: __m128h,
- k: __mmask8,
- sae: i32,
- ) -> __m128h;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph128_ph256() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm256_castph128_ph256(a);
+ assert_eq_m128h(_mm256_castph256_ph128(r), a);
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.128"]
- fn vrndscaleph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.256"]
- fn vrndscaleph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.512"]
- fn vrndscaleph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.rndscale.sh"]
- fn vrndscalesh(
- a: __m128h,
- b: __m128h,
- src: __m128h,
- k: __mmask8,
- imm8: i32,
- sae: i32,
- ) -> __m128h;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph128_ph512() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_castph128_ph512(a);
+ assert_eq_m128h(_mm512_castph512_ph128(r), a);
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.128"]
- fn vscalefph_128(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.256"]
- fn vscalefph_256(a: __m256h, b: __m256h, src: __m256h, k: __mmask16) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.512"]
- fn vscalefph_512(a: __m512h, b: __m512h, src: __m512h, k: __mmask32, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.scalef.sh"]
- fn vscalefsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph256_ph512() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_castph256_ph512(a);
+ assert_eq_m256h(_mm512_castph512_ph256(r), a);
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.128"]
- fn vreduceph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.256"]
- fn vreduceph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.512"]
- fn vreduceph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.reduce.sh"]
- fn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32)
- -> __m128h;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_zextph128_ph256() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm256_zextph128_ph256(a);
+ let e = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m256h(r, e);
+ }
- #[link_name = "llvm.x86.avx512fp16.mask.fpclass.sh"]
- fn vfpclasssh(a: __m128h, imm8: i32, k: __mmask8) -> __mmask8;
-}
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_zextph128_ph512() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_zextph128_ph512(a);
+ let e = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512h(r, e);
+ }
-#[cfg(test)]
-mod tests {
- use crate::core_arch::x86::*;
- use crate::mem::transmute;
- use crate::ptr::{addr_of, addr_of_mut};
- use stdarch_test::simd_test;
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_zextph256_ph512() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_zextph256_ph512(a);
+ let e = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512h(r, e);
+ }
- #[target_feature(enable = "avx512fp16")]
- unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
- _mm_setr_ph(re, im, re, im, re, im, re, im)
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cmp_ph_mask() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
+ let r = _mm_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 0b11110000);
}
- #[target_feature(enable = "avx512fp16")]
- unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
- _mm256_setr_ph(
- re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
- )
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cmp_ph_mask() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
+ let r = _mm_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101, a, b);
+ assert_eq!(r, 0b01010000);
}
- #[target_feature(enable = "avx512fp16")]
- unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
- _mm512_setr_ph(
- re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
- re, im, re, im, re, im, re, im, re, im,
- )
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cmp_ph_mask() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
+ -16.0,
+ );
+ let r = _mm256_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 0b1111000011110000);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set_ph() {
- let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cmp_ph_mask() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
+ -16.0,
+ );
+ let r = _mm256_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b0101010101010101, a, b);
+ assert_eq!(r, 0b0101000001010000);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_set_ph() {
- let r = _mm256_set_ph(
+ unsafe fn test_mm512_cmp_ph_mask() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- let e = _mm256_setr_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ let b = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
+ -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
+ -29.0, -30.0, -31.0, -32.0,
);
- assert_eq_m256h(r, e);
+ let r = _mm512_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 0b11110000111100001111000011110000);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_set_ph() {
- let r = _mm512_set_ph(
+ unsafe fn test_mm512_mask_cmp_ph_mask() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
- let e = _mm512_setr_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
+ let b = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
+ -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
+ -29.0, -30.0, -31.0, -32.0,
);
- assert_eq_m512h(r, e);
+ let r = _mm512_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101010101010101010101010101, a, b);
+ assert_eq!(r, 0b01010000010100000101000001010000);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set_sh() {
- let r = _mm_set_sh(1.0);
- let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm_cmp_round_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
+ assert_eq!(r, 1);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set1_ph() {
- let r = _mm_set1_ph(1.0);
- let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm_mask_cmp_round_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
+ assert_eq!(r, 0);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_set1_ph() {
- let r = _mm256_set1_ph(1.0);
- let e = _mm256_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- );
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_cmp_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 1);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_set1_ph() {
- let r = _mm512_set1_ph(1.0);
- let e = _mm512_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- );
- assert_eq_m512h(r, e);
+ unsafe fn test_mm_mask_cmp_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
+ assert_eq!(r, 0);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_setr_ph() {
- let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm_comi_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
+ assert_eq!(r, 1);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_setr_ph() {
- let r = _mm256_setr_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let e = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_comi_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 1);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_setr_ph() {
- let r = _mm512_setr_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let e = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- assert_eq_m512h(r, e);
+ unsafe fn test_mm_comieq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comieq_sh(a, b);
+ assert_eq!(r, 1);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_setzero_ph() {
- let r = _mm_setzero_ph();
- let e = _mm_set1_ph(0.0);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comige_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comige_sh(a, b);
+ assert_eq!(r, 1);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_setzero_ph() {
- let r = _mm256_setzero_ph();
- let e = _mm256_set1_ph(0.0);
- assert_eq_m256h(r, e);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comigt_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comigt_sh(a, b);
+ assert_eq!(r, 1);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_setzero_ph() {
- let r = _mm512_setzero_ph();
- let e = _mm512_set1_ph(0.0);
- assert_eq_m512h(r, e);
+ unsafe fn test_mm_comile_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comile_sh(a, b);
+ assert_eq!(r, 1);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castsi128_ph() {
- let a = _mm_set1_epi16(0x3c00);
- let r = _mm_castsi128_ph(a);
- let e = _mm_set1_ph(1.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm_comilt_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comilt_sh(a, b);
+ assert_eq!(r, 1);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castsi256_ph() {
- let a = _mm256_set1_epi16(0x3c00);
- let r = _mm256_castsi256_ph(a);
- let e = _mm256_set1_ph(1.0);
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_comineq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comineq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomieq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomieq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomige_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomige_sh(a, b);
+ assert_eq!(r, 1);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castsi512_ph() {
- let a = _mm512_set1_epi16(0x3c00);
- let r = _mm512_castsi512_ph(a);
- let e = _mm512_set1_ph(1.0);
- assert_eq_m512h(r, e);
+ unsafe fn test_mm_ucomigt_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomigt_sh(a, b);
+ assert_eq!(r, 1);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_si128() {
- let a = _mm_set1_ph(1.0);
- let r = _mm_castph_si128(a);
- let e = _mm_set1_epi16(0x3c00);
- assert_eq_m128i(r, e);
+ unsafe fn test_mm_ucomile_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomile_sh(a, b);
+ assert_eq!(r, 1);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_si256() {
- let a = _mm256_set1_ph(1.0);
- let r = _mm256_castph_si256(a);
- let e = _mm256_set1_epi16(0x3c00);
- assert_eq_m256i(r, e);
+ unsafe fn test_mm_ucomilt_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomilt_sh(a, b);
+ assert_eq!(r, 1);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_si512() {
- let a = _mm512_set1_ph(1.0);
- let r = _mm512_castph_si512(a);
- let e = _mm512_set1_epi16(0x3c00);
- assert_eq_m512i(r, e);
+ unsafe fn test_mm_ucomineq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomineq_sh(a, b);
+ assert_eq!(r, 1);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castps_ph() {
- let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
- let r = _mm_castps_ph(a);
- let e = _mm_set1_ph(1.0);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_load_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_load_ph(addr_of!(a).cast());
+ assert_eq_m128h(a, b);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castps_ph() {
- let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
- let r = _mm256_castps_ph(a);
- let e = _mm256_set1_ph(1.0);
- assert_eq_m256h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_load_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_load_ph(addr_of!(a).cast());
+ assert_eq_m256h(a, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castps_ph() {
- let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
- let r = _mm512_castps_ph(a);
- let e = _mm512_set1_ph(1.0);
- assert_eq_m512h(r, e);
+ unsafe fn test_mm512_load_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_load_ph(addr_of!(a).cast());
+ assert_eq_m512h(a, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_ps() {
- let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
- let r = _mm_castph_ps(a);
- let e = _mm_set1_ps(1.0);
- assert_eq_m128(r, e);
+ unsafe fn test_mm_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_load_sh(addr_of!(a).cast());
+ assert_eq_m128h(a, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_ps() {
- let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
- let r = _mm256_castph_ps(a);
- let e = _mm256_set1_ps(1.0);
- assert_eq_m256(r, e);
+ unsafe fn test_mm_mask_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let src = _mm_set_sh(2.);
+ let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
+ assert_eq_m128h(src, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_ps() {
- let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
- let r = _mm512_castph_ps(a);
- let e = _mm512_set1_ps(1.0);
- assert_eq_m512(r, e);
+ unsafe fn test_mm_maskz_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
+ assert_eq_m128h(_mm_setzero_ph(), b);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castpd_ph() {
- let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
- let r = _mm_castpd_ph(a);
- let e = _mm_set1_ph(1.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_loadu_ph() {
+ let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
+ let r = _mm_loadu_ph(array.as_ptr());
+ let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castpd_ph() {
- let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
- let r = _mm256_castpd_ph(a);
- let e = _mm256_set1_ph(1.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_loadu_ph() {
+ let array = [
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ ];
+ let r = _mm256_loadu_ph(array.as_ptr());
+ let e = _mm256_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castpd_ph() {
- let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
- let r = _mm512_castpd_ph(a);
- let e = _mm512_set1_ph(1.0);
+ unsafe fn test_mm512_loadu_ph() {
+ let array = [
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ ];
+ let r = _mm512_loadu_ph(array.as_ptr());
+ let e = _mm512_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_pd() {
- let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
- let r = _mm_castph_pd(a);
- let e = _mm_set1_pd(1.0);
- assert_eq_m128d(r, e);
+ unsafe fn test_mm_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let r = _mm_move_sh(a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_pd() {
- let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
- let r = _mm256_castph_pd(a);
- let e = _mm256_set1_pd(1.0);
- assert_eq_m256d(r, e);
+ unsafe fn test_mm_mask_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let src = _mm_set_sh(10.0);
+ let r = _mm_mask_move_sh(src, 0, a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_pd() {
- let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
- let r = _mm512_castph_pd(a);
- let e = _mm512_set1_pd(1.0);
- assert_eq_m512d(r, e);
+ unsafe fn test_mm_maskz_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let r = _mm_maskz_move_sh(0, a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph256_ph128() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
- );
- let r = _mm256_castph256_ph128(a);
- let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_store_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let mut b = _mm_setzero_ph();
+ _mm_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m128h(a, b);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph512_ph128() {
- let a = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
- 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_store_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- let r = _mm512_castph512_ph128(a);
- let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- assert_eq_m128h(r, e);
+ let mut b = _mm256_setzero_ph();
+ _mm256_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m256h(a, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph512_ph256() {
- let a = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
- 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
- );
- let r = _mm512_castph512_ph256(a);
- let e = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ unsafe fn test_mm512_store_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- assert_eq_m256h(r, e);
+ let mut b = _mm512_setzero_ph();
+ _mm512_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m512h(a, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph128_ph256() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm256_castph128_ph256(a);
- assert_eq_m128h(_mm256_castph256_ph128(r), a);
+ unsafe fn test_mm_store_sh() {
+ let a = _mm_set_sh(1.0);
+ let mut b = _mm_setzero_ph();
+ _mm_store_sh(addr_of_mut!(b).cast(), a);
+ assert_eq_m128h(a, b);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph128_ph512() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm512_castph128_ph512(a);
- assert_eq_m128h(_mm512_castph512_ph128(r), a);
+ unsafe fn test_mm_mask_store_sh() {
+ let a = _mm_set_sh(1.0);
+ let mut b = _mm_setzero_ph();
+ _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
+ assert_eq_m128h(_mm_setzero_ph(), b);
+ _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_storeu_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let mut array = [0.0; 8];
+ _mm_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph256_ph512() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_storeu_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- let r = _mm512_castph256_ph512(a);
- assert_eq_m256h(_mm512_castph512_ph256(r), a);
+ let mut array = [0.0; 16];
+ _mm256_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_zextph128_ph256() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm256_zextph128_ph256(a);
- let e = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
+ unsafe fn test_mm512_storeu_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- assert_eq_m256h(r, e);
+ let mut array = [0.0; 32];
+ _mm512_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_zextph128_ph512() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm512_zextph128_ph512(a);
- let e = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
- 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
- );
- assert_eq_m512h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_add_ph(a, b);
+ let e = _mm_set1_ph(9.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_zextph256_ph512() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
- );
- let r = _mm512_zextph256_ph512(a);
- let e = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
- 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
- );
- assert_eq_m512h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_add_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_cmp_ph_mask() {
+ unsafe fn test_mm_maskz_add_ph() {
let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
- let r = _mm_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
- assert_eq!(r, 0b11110000);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_add_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_cmp_ph_mask() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
- let r = _mm_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101, a, b);
- assert_eq!(r, 0b01010000);
+ unsafe fn test_mm256_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_add_ph(a, b);
+ let e = _mm256_set1_ph(17.0);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_cmp_ph_mask() {
+ unsafe fn test_mm256_mask_add_ph() {
let a = _mm256_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
let b = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
- -16.0,
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
);
- let r = _mm256_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
- assert_eq!(r, 0b1111000011110000);
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_cmp_ph_mask() {
+ unsafe fn test_mm256_maskz_add_ph() {
let a = _mm256_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
let b = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
- -16.0,
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
);
- let r = _mm256_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b0101010101010101, a, b);
- assert_eq!(r, 0b0101000001010000);
+ let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_cmp_ph_mask() {
+ unsafe fn test_mm512_add_ph() {
let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
let b = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
- -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
- -29.0, -30.0, -31.0, -32.0,
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
);
- let r = _mm512_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
- assert_eq!(r, 0b11110000111100001111000011110000);
+ let r = _mm512_add_ph(a, b);
+ let e = _mm512_set1_ph(33.0);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_cmp_ph_mask() {
+ unsafe fn test_mm512_mask_add_ph() {
let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
let b = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
- -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
- -29.0, -30.0, -31.0, -32.0,
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
);
- let r = _mm512_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101010101010101010101010101, a, b);
- assert_eq!(r, 0b01010000010100000101000001010000);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmp_round_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmp_round_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
- assert_eq!(r, 0);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmp_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmp_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
- assert_eq!(r, 0);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comi_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
- assert_eq!(r, 1);
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
+ 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comi_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_maskz_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
+ 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comieq_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comieq_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(33.0);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comige_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comige_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_mask_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
+ 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comigt_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comigt_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_maskz_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
+ 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comile_sh() {
+ unsafe fn test_mm_add_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_comile_sh(a, b);
- assert_eq!(r, 1);
+ let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comilt_sh() {
+ unsafe fn test_mm_mask_add_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_comilt_sh(a, b);
- assert_eq!(r, 1);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comineq_sh() {
+ unsafe fn test_mm_maskz_add_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_comineq_sh(a, b);
- assert_eq!(r, 1);
+ let r =
+ _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomieq_sh() {
+ unsafe fn test_mm_add_sh() {
let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomieq_sh(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomige_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomige_sh(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomigt_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomigt_sh(a, b);
- assert_eq!(r, 1);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_add_sh(a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomile_sh() {
+ unsafe fn test_mm_mask_add_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_ucomile_sh(a, b);
- assert_eq!(r, 1);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_add_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_add_sh(src, 1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomilt_sh() {
+ unsafe fn test_mm_maskz_add_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_ucomilt_sh(a, b);
- assert_eq!(r, 1);
+ let r = _mm_maskz_add_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_add_sh(1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomineq_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_ucomineq_sh(a, b);
- assert_eq!(r, 1);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_sub_ph(a, b);
+ let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_load_ph() {
+ unsafe fn test_mm_mask_sub_ph() {
let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_load_ph(addr_of!(a).cast());
- assert_eq_m128h(a, b);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_load_ph() {
+ unsafe fn test_mm_maskz_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_sub_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_sub_ph() {
let a = _mm256_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- let b = _mm256_load_ph(addr_of!(a).cast());
- assert_eq_m256h(a, b);
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_sub_ph(a, b);
+ let e = _mm256_set_ph(
+ -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
+ 15.0,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_load_ph() {
- let a = _mm512_set_ph(
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_sub_ph() {
+ let a = _mm256_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
);
- let b = _mm512_load_ph(addr_of!(a).cast());
- assert_eq_m512h(a, b);
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_load_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_load_sh(addr_of!(a).cast());
- assert_eq_m128h(a, b);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_load_sh() {
- let a = _mm_set_sh(1.0);
- let src = _mm_set_sh(2.);
- let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
- assert_eq_m128h(a, b);
- let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
- assert_eq_m128h(src, b);
+ unsafe fn test_mm512_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_sub_ph(a, b);
+ let e = _mm512_set_ph(
+ -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
+ -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
+ 23.0, 25.0, 27.0, 29.0, 31.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_load_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
- assert_eq_m128h(a, b);
- let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
- assert_eq_m128h(_mm_setzero_ph(), b);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_loadu_ph() {
- let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
- let r = _mm_loadu_ph(array.as_ptr());
- let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_loadu_ph() {
- let array = [
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- ];
- let r = _mm256_loadu_ph(array.as_ptr());
- let e = _mm256_setr_ph(
+ unsafe fn test_mm512_mask_sub_ph() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
+ 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
);
- assert_eq_m256h(r, e);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_loadu_ph() {
- let array = [
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- ];
- let r = _mm512_loadu_ph(array.as_ptr());
- let e = _mm512_setr_ph(
+ unsafe fn test_mm512_maskz_sub_ph() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
+ 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let r = _mm_move_sh(a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let src = _mm_set_sh(10.0);
- let r = _mm_mask_move_sh(src, 0, a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set_ph(
+ -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
+ -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
+ 23.0, 25.0, 27.0, 29.0, 31.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let r = _mm_maskz_move_sh(0, a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_store_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let mut b = _mm_setzero_ph();
- _mm_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m128h(a, b);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_store_ph() {
- let a = _mm256_set_ph(
+ unsafe fn test_mm512_mask_sub_round_ph() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- let mut b = _mm256_setzero_ph();
- _mm256_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m256h(a, b);
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
+ 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_store_ph() {
+ unsafe fn test_mm512_maskz_sub_round_ph() {
let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
- let mut b = _mm512_setzero_ph();
- _mm512_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m512h(a, b);
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
+ 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_store_sh() {
+ unsafe fn test_mm_sub_round_sh() {
let a = _mm_set_sh(1.0);
- let mut b = _mm_setzero_ph();
- _mm_store_sh(addr_of_mut!(b).cast(), a);
- assert_eq_m128h(a, b);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_store_sh() {
+ unsafe fn test_mm_mask_sub_round_sh() {
let a = _mm_set_sh(1.0);
- let mut b = _mm_setzero_ph();
- _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
- assert_eq_m128h(_mm_setzero_ph(), b);
- _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
- assert_eq_m128h(a, b);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_storeu_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let mut array = [0.0; 8];
- _mm_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_sub_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_storeu_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let mut array = [0.0; 16];
- _mm256_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_sub_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_sub_sh(a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_storeu_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let mut array = [0.0; 32];
- _mm512_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
+ unsafe fn test_mm_mask_sub_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_sub_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_sub_sh(src, 1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_sub_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_sub_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_sub_sh(1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_add_ph() {
+ unsafe fn test_mm_mul_ph() {
let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_add_ph(a, b);
- let e = _mm_set1_ph(9.0);
+ let r = _mm_mul_ph(a, b);
+ let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_add_ph() {
+ unsafe fn test_mm_mask_mul_ph() {
let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_add_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
+ let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_add_ph() {
+ unsafe fn test_mm_maskz_mul_ph() {
let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_add_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
+ let r = _mm_maskz_mul_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_add_ph() {
+ unsafe fn test_mm256_mul_ph() {
let a = _mm256_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
let b = _mm256_set_ph(
16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
);
- let r = _mm256_add_ph(a, b);
- let e = _mm256_set1_ph(17.0);
+ let r = _mm256_mul_ph(a, b);
+ let e = _mm256_set_ph(
+ 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
+ 30.0, 16.0,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_add_ph() {
+ unsafe fn test_mm256_mask_mul_ph() {
let a = _mm256_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
@@ -12319,30 +14758,30 @@ mod tests {
let src = _mm256_set_ph(
18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
);
- let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
+ let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
+ 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_add_ph() {
+ unsafe fn test_mm256_maskz_mul_ph() {
let a = _mm256_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
let b = _mm256_set_ph(
16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
);
- let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
+ let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
+ 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_add_ph() {
+ unsafe fn test_mm512_mul_ph() {
let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -12353,13 +14792,17 @@ mod tests {
18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
3.0, 2.0, 1.0,
);
- let r = _mm512_add_ph(a, b);
- let e = _mm512_set1_ph(33.0);
+ let r = _mm512_mul_ph(a, b);
+ let e = _mm512_set_ph(
+ 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
+ 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
+ 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_add_ph() {
+ unsafe fn test_mm512_mask_mul_ph() {
let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -12374,16 +14817,16 @@ mod tests {
34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
);
- let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
+ let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
- 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
+ 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_add_ph() {
+ unsafe fn test_mm512_maskz_mul_ph() {
let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -12394,16 +14837,16 @@ mod tests {
18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
3.0, 2.0, 1.0,
);
- let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
+ let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
- 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
+ 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_add_round_ph() {
+ unsafe fn test_mm512_mul_round_ph() {
let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -12414,13 +14857,17 @@ mod tests {
18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
3.0, 2.0, 1.0,
);
- let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(33.0);
+ let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set_ph(
+ 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
+ 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
+ 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_add_round_ph() {
+ unsafe fn test_mm512_mask_mul_round_ph() {
let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -12435,21 +14882,21 @@ mod tests {
34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
);
- let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
0b01010101010101010101010101010101,
a,
b,
);
let e = _mm512_set_ph(
- 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
- 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
+ 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_add_round_ph() {
+ unsafe fn test_mm512_maskz_mul_round_ph() {
let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -12460,939 +14907,983 @@ mod tests {
18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
3.0, 2.0, 1.0,
);
- let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
+ 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mul_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_mul_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_mul_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mul_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_mul_sh(a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_mul_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_mul_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_mul_sh(src, 1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_mul_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_mul_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_mul_sh(1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let r = _mm_div_ph(a, b);
+ let e = _mm_set1_ph(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
+ let r = _mm_mask_div_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let r = _mm_maskz_div_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let r = _mm256_div_ph(a, b);
+ let e = _mm256_set1_ph(0.5);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let src = _mm256_set_ph(
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0,
+ );
+ let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_div_ph(a, b);
+ let e = _mm512_set1_ph(0.5);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let src = _mm512_set_ph(
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
+ 33.0, 34.0, 35.0,
+ );
+ let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(0.5);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let src = _mm512_set_ph(
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
+ 33.0, 34.0, 35.0,
+ );
+ let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
b,
);
let e = _mm512_set_ph(
- 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
- 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_add_round_sh() {
+ unsafe fn test_mm_div_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(3.0);
+ let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_add_round_sh() {
+ unsafe fn test_mm_mask_div_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
let src = _mm_set_sh(4.0);
- let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
let e = _mm_set_sh(4.0);
assert_eq_m128h(r, e);
- let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 1, a, b,
);
- let e = _mm_set_sh(3.0);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_add_round_sh() {
+ unsafe fn test_mm_maskz_div_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
let r =
- _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
let e = _mm_set_sh(0.0);
assert_eq_m128h(r, e);
let r =
- _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(3.0);
+ _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_add_sh() {
+ unsafe fn test_mm_div_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_add_sh(a, b);
- let e = _mm_set_sh(3.0);
+ let r = _mm_div_sh(a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_add_sh() {
+ unsafe fn test_mm_mask_div_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
let src = _mm_set_sh(4.0);
- let r = _mm_mask_add_sh(src, 0, a, b);
+ let r = _mm_mask_div_sh(src, 0, a, b);
let e = _mm_set_sh(4.0);
assert_eq_m128h(r, e);
- let r = _mm_mask_add_sh(src, 1, a, b);
- let e = _mm_set_sh(3.0);
+ let r = _mm_mask_div_sh(src, 1, a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_add_sh() {
+ unsafe fn test_mm_maskz_div_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_maskz_add_sh(0, a, b);
+ let r = _mm_maskz_div_sh(0, a, b);
let e = _mm_set_sh(0.0);
assert_eq_m128h(r, e);
- let r = _mm_maskz_add_sh(1, a, b);
- let e = _mm_set_sh(3.0);
+ let r = _mm_maskz_div_sh(1, a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_sub_ph(a, b);
- let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
+ unsafe fn test_mm_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_mul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
+ unsafe fn test_mm_mask_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_mul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_sub_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
+ unsafe fn test_mm_maskz_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_maskz_mul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_sub_ph(a, b);
- let e = _mm256_set_ph(
- -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
- 15.0,
- );
+ unsafe fn test_mm256_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_mul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ unsafe fn test_mm256_mask_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
+ let r = _mm256_mask_mul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
+ unsafe fn test_mm256_maskz_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_mul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_sub_ph(a, b);
- let e = _mm512_set_ph(
- -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
- -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
- 23.0, 25.0, 27.0, 29.0, 31.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
- );
- let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
- 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
- );
+ unsafe fn test_mm512_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_mul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
+ unsafe fn test_mm512_mask_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
- 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set_ph(
- -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
- -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
- 23.0, 25.0, 27.0, 29.0, 31.0,
+ unsafe fn test_mm512_maskz_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
- 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
- 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(-1.0);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm_maskz_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
let r =
- _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(-1.0);
+ _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_sub_sh(a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_mul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_sub_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_sub_sh(src, 1, a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_mask_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_mul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_sub_sh(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_sub_sh(1, a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_maskz_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_maskz_mul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_mul_ph(a, b);
- let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
+ unsafe fn test_mm_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_fmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
+ unsafe fn test_mm_mask_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_fmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_mul_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
+ unsafe fn test_mm_maskz_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_maskz_fmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_mul_ph(a, b);
- let e = _mm256_set_ph(
- 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
- 30.0, 16.0,
- );
+ unsafe fn test_mm256_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_fmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ unsafe fn test_mm256_mask_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
+ let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
+ unsafe fn test_mm256_maskz_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_mul_ph(a, b);
- let e = _mm512_set_ph(
- 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
- 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
- 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
- );
- let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
- 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
- );
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_fmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
+ unsafe fn test_mm512_mask_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
- 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set_ph(
- 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
- 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
- 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ unsafe fn test_mm512_maskz_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
- 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
- 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(2.0);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm_maskz_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
let r =
- _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(2.0);
+ _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_mul_sh(a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_fmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_mul_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_mul_sh(src, 1, a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_mask_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_mul_sh(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_mul_sh(1, a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_maskz_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_maskz_fmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_div_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let r = _mm_div_ph(a, b);
- let e = _mm_set1_ph(0.5);
+ unsafe fn test_mm_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_cmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_div_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
- let r = _mm_mask_div_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
+ unsafe fn test_mm_mask_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_cmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_div_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let r = _mm_maskz_div_ph(0b01010101, a, b);
- let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ unsafe fn test_mm_maskz_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_maskz_cmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_div_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let r = _mm256_div_ph(a, b);
- let e = _mm256_set1_ph(0.5);
+ unsafe fn test_mm256_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_cmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_div_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let src = _mm256_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0,
+ unsafe fn test_mm256_mask_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_div_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ unsafe fn test_mm256_maskz_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_div_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_div_ph(a, b);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_cmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_div_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let src = _mm512_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
- 33.0, 34.0, 35.0,
+ unsafe fn test_mm512_mask_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
- 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_div_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ unsafe fn test_mm512_maskz_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_div_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_div_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let src = _mm512_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
- 33.0, 34.0, 35.0,
+ unsafe fn test_mm512_mask_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
- 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_div_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_cmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_mask_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_cmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r =
- _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_maskz_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_maskz_cmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_div_sh(a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_div_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_div_sh(src, 1, a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_mask_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_div_sh(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_div_sh(1, a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_maskz_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r =
+ _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mul_pch() {
+ unsafe fn test_mm_fcmul_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_mul_pch(a, b);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_fcmul_pch(a, b);
let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_mul_pch() {
+ unsafe fn test_mm_mask_fcmul_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_mul_pch(src, 0b0101, a, b);
+ let r = _mm_mask_fcmul_pch(src, 0b0101, a, b);
let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_mul_pch() {
+ unsafe fn test_mm_maskz_fcmul_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_maskz_mul_pch(0b0101, a, b);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_maskz_fcmul_pch(0b0101, a, b);
let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mul_pch() {
+ unsafe fn test_mm256_fcmul_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_mul_pch(a, b);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_fcmul_pch(a, b);
let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_mul_pch() {
+ unsafe fn test_mm256_mask_fcmul_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
let src = _mm256_setr_ph(
2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_mul_pch(src, 0b01010101, a, b);
+ let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b);
let e = _mm256_setr_ph(
-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
@@ -13400,10 +15891,10 @@ mod tests {
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_mul_pch() {
+ unsafe fn test_mm256_maskz_fcmul_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_maskz_mul_pch(0b01010101, a, b);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
let e = _mm256_setr_ph(
-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
@@ -13411,24 +15902,24 @@ mod tests {
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_pch() {
+ unsafe fn test_mm512_fcmul_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_mul_pch(a, b);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_fcmul_pch(a, b);
let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_pch() {
+ unsafe fn test_mm512_mask_fcmul_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
let src = _mm512_setr_ph(
2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
32.0, 33.0,
);
- let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b);
+ let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b);
let e = _mm512_setr_ph(
-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
-1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
@@ -13438,10 +15929,10 @@ mod tests {
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_pch() {
+ unsafe fn test_mm512_maskz_fcmul_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
let e = _mm512_setr_ph(
-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
@@ -13450,24 +15941,24 @@ mod tests {
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ unsafe fn test_mm512_fcmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_round_pch() {
+ unsafe fn test_mm512_mask_fcmul_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
let src = _mm512_setr_ph(
2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
32.0, 33.0,
);
- let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
0b0101010101010101,
a,
@@ -13482,10 +15973,10 @@ mod tests {
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_round_pch() {
+ unsafe fn test_mm512_maskz_fcmul_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b0101010101010101,
a,
b,
@@ -13498,5386 +15989,5955 @@ mod tests {
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_round_sch() {
+ unsafe fn test_mm_fcmul_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_fcmul_sch(a, b);
let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_round_sch() {
+ unsafe fn test_mm_mask_fcmul_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
+ let r = _mm_mask_fcmul_sch(src, 0, a, b);
let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_round_sch() {
+ unsafe fn test_mm_maskz_fcmul_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r =
- _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_maskz_fcmul_sch(0, a, b);
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_sch() {
+ unsafe fn test_mm_fcmul_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_mul_sch(a, b);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_sch() {
+ unsafe fn test_mm_mask_fcmul_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_mul_sch(src, 0, a, b);
+ let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_sch() {
+ unsafe fn test_mm_maskz_fcmul_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_maskz_mul_sch(0, a, b);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r =
+ _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmul_pch() {
+ unsafe fn test_mm_abs_ph() {
+ let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0);
+ let r = _mm_abs_ph(a);
+ let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_abs_ph() {
+ let a = _mm256_set_ph(
+ -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
+ -14.0,
+ );
+ let r = _mm256_abs_ph(a);
+ let e = _mm256_set_ph(
+ 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_abs_ph() {
+ let a = _mm512_set_ph(
+ -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
+ -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0,
+ 27.0, -28.0, 29.0, -30.0,
+ );
+ let r = _mm512_abs_ph(a);
+ let e = _mm512_set_ph(
+ 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
+ 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0,
+ 29.0, 30.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_conj_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_fmul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ let r = _mm_conj_pch(a);
+ let e = _mm_set1_pch(0.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmul_pch() {
+ unsafe fn test_mm_mask_conj_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_fmul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ let r = _mm_mask_conj_pch(src, 0b0101, a);
+ let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmul_pch() {
+ unsafe fn test_mm_maskz_conj_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_maskz_fmul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ let r = _mm_maskz_conj_pch(0b0101, a);
+ let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmul_pch() {
+ unsafe fn test_mm256_conj_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_fmul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ let r = _mm256_conj_pch(a);
+ let e = _mm256_set1_pch(0.0, -1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmul_pch() {
+ unsafe fn test_mm256_mask_conj_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
let src = _mm256_setr_ph(
2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b);
+ let r = _mm256_mask_conj_pch(src, 0b01010101, a);
let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmul_pch() {
+ unsafe fn test_mm256_maskz_conj_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
+ let r = _mm256_maskz_conj_pch(0b01010101, a);
+ let e = _mm256_setr_ph(
+ 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_conj_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_conj_pch(a);
+ let e = _mm512_set1_pch(0.0, -1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_conj_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
+ );
+ let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a);
+ let e = _mm512_setr_ph(
+ 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
+ 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0,
+ 33.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_conj_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_conj_pch(0b0101010101010101, a);
+ let e = _mm512_setr_ph(
+ 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
+ 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_fmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_fmadd_pch(a, b, c);
+ let e = _mm_set1_pch(-2.0, 3.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_fmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask_fmadd_pch(a, 0b0101, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask3_fmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101);
+ let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_fmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_maskz_fmadd_pch(0b0101, a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_fmadd_pch(a, b, c);
+ let e = _mm256_set1_pch(-2.0, 3.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_fmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c);
+ let e = _mm256_setr_ph(
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101);
+ let e = _mm256_setr_ph(
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_fmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c);
let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmul_pch() {
+ unsafe fn test_mm512_fmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_fmul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_fmadd_pch(a, b, c);
+ let e = _mm512_set1_pch(-2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmul_pch() {
+ unsafe fn test_mm512_mask_fmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c);
+ let e = _mm512_setr_ph(
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
);
- let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmul_pch() {
+ unsafe fn test_mm512_maskz_fmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmul_round_pch() {
+ unsafe fn test_mm512_fmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r =
+ _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pch(-2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmul_round_pch() {
+ unsafe fn test_mm512_mask_fmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
0b0101010101010101,
+ b,
+ c,
+ );
+ let e = _mm512_setr_ph(
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b0101010101010101,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmul_round_pch() {
+ unsafe fn test_mm512_maskz_fmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b0101010101010101,
a,
b,
+ c,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmul_round_sch() {
+ unsafe fn test_mm_fmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fmadd_sch(a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmul_round_sch() {
+ unsafe fn test_mm_mask_fmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fmadd_sch(a, 0, b, c);
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fmadd_sch(a, 1, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmul_round_sch() {
+ unsafe fn test_mm_mask3_fmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r =
- _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fmadd_sch(a, b, c, 0);
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fmadd_sch(a, b, c, 1);
+ let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fmadd_sch(0, a, b, c);
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmadd_sch(1, a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmul_sch() {
+ unsafe fn test_mm_fmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_fmul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmul_sch() {
+ unsafe fn test_mm_mask_fmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fmul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmul_sch() {
+ unsafe fn test_mm_mask3_fmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_maskz_fmul_sch(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
+ );
+ let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_cmul_pch() {
+ unsafe fn test_mm_fcmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_cmul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_fcmadd_pch(a, b, c);
+ let e = _mm_set1_pch(2.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_cmul_pch() {
+ unsafe fn test_mm_mask_fcmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_cmul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_cmul_pch() {
+ unsafe fn test_mm_mask3_fcmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_maskz_cmul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101);
+ let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_fcmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_cmul_pch() {
+ unsafe fn test_mm256_fcmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_fcmadd_pch(a, b, c);
+ let e = _mm256_set1_pch(2.0, 3.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_fcmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_cmul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c);
+ let e = _mm256_setr_ph(
+ 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_cmul_pch() {
+ unsafe fn test_mm256_mask3_fcmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- );
- let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101);
let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_cmul_pch() {
+ unsafe fn test_mm256_maskz_fcmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c);
let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_cmul_pch() {
+ unsafe fn test_mm512_fcmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_cmul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_fcmadd_pch(a, b, c);
+ let e = _mm512_set1_pch(2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_cmul_pch() {
+ unsafe fn test_mm512_mask_fcmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c);
+ let e = _mm512_setr_ph(
+ 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
+ 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
);
- let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fcmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
+ 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_cmul_pch() {
+ unsafe fn test_mm512_maskz_fcmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
+ 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_cmul_round_pch() {
+ unsafe fn test_mm512_fcmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r =
+ _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pch(2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_cmul_round_pch() {
+ unsafe fn test_mm512_mask_fcmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
0b0101010101010101,
+ b,
+ c,
+ );
+ let e = _mm512_setr_ph(
+ 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
+ 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fcmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b0101010101010101,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
+ 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_cmul_round_pch() {
+ unsafe fn test_mm512_maskz_fcmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b0101010101010101,
a,
b,
+ c,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
+ 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmul_sch() {
+ unsafe fn test_mm_fcmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_cmul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fcmadd_sch(a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmul_sch() {
+ unsafe fn test_mm_mask_fcmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_cmul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fcmadd_sch(a, 0, b, c);
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fcmadd_sch(a, 1, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_cmul_sch() {
+ unsafe fn test_mm_mask3_fcmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_maskz_cmul_sch(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fcmadd_sch(a, b, c, 0);
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fcmadd_sch(a, b, c, 1);
+ let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fcmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fcmadd_sch(0, a, b, c);
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fcmadd_sch(1, a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmul_round_sch() {
+ unsafe fn test_mm_fcmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmul_round_sch() {
+ unsafe fn test_mm_mask_fcmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_cmul_round_sch() {
+ unsafe fn test_mm_mask3_fcmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r =
- _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
+ );
+ let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fcmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fmadd_ph(a, b, c);
+ let e = _mm_set1_ph(5.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fcmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_fcmul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm_mask_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fcmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_fcmul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ unsafe fn test_mm_mask3_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fcmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_maskz_fcmul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ unsafe fn test_mm_maskz_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fcmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_fcmul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm256_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fmadd_ph(a, b, c);
+ let e = _mm256_set1_ph(5.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fcmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ unsafe fn test_mm256_mask_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c);
+ let e = _mm256_set_ph(
+ 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
);
- let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101);
+ let e = _mm256_set_ph(
+ 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fcmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm256_maskz_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_fcmul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm512_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmadd_ph(a, b, c);
+ let e = _mm512_set1_ph(5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ unsafe fn test_mm512_mask_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let e = _mm512_set_ph(
+ 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
+ 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm512_mask3_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let e = _mm512_set_ph(
+ 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
+ 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm512_maskz_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
+ 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
- 0b0101010101010101,
- a,
- b,
- );
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
- );
+ unsafe fn test_mm512_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
+ unsafe fn test_mm512_mask_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
+ 0b01010101010101010101010101010101,
b,
+ c,
);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_fcmul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fcmul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_maskz_fcmul_sch(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r =
- _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_abs_ph() {
- let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0);
- let r = _mm_abs_ph(a);
- let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
- assert_eq_m128h(r, e);
+ let e = _mm512_set_ph(
+ 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
+ 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
+ );
+ assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_abs_ph() {
- let a = _mm256_set_ph(
- -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
- -14.0,
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
+ b,
+ c,
+ 0b01010101010101010101010101010101,
);
- let r = _mm256_abs_ph(a);
- let e = _mm256_set_ph(
- 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
+ let e = _mm512_set_ph(
+ 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
+ 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
);
- assert_eq_m256h(r, e);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_abs_ph() {
- let a = _mm512_set_ph(
- -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
- -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0,
- 27.0, -28.0, 29.0, -30.0,
+ unsafe fn test_mm512_maskz_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ c,
);
- let r = _mm512_abs_ph(a);
let e = _mm512_set_ph(
- 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
- 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0,
- 29.0, 30.0,
+ 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
+ 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_conj_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let r = _mm_conj_pch(a);
- let e = _mm_set1_pch(0.0, -1.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmadd_sh(a, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_conj_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_conj_pch(src, 0b0101, a);
- let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmadd_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_conj_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let r = _mm_maskz_conj_pch(0b0101, a);
- let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0);
+ let r = _mm_mask_fmadd_sh(a, 1, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_conj_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_conj_pch(a);
- let e = _mm256_set1_pch(0.0, -1.0);
- assert_eq_m256h(r, e);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask3_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmadd_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fmadd_sh(a, b, c, 1);
+ let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_conj_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- );
- let r = _mm256_mask_conj_pch(src, 0b01010101, a);
- let e = _mm256_setr_ph(
- 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
- );
- assert_eq_m256h(r, e);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmadd_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmadd_sh(1, a, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_conj_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_maskz_conj_pch(0b01010101, a);
- let e = _mm256_setr_ph(
- 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
- );
- assert_eq_m256h(r, e);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_conj_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_conj_pch(a);
- let e = _mm512_set1_pch(0.0, -1.0);
- assert_eq_m512h(r, e);
+ unsafe fn test_mm_mask_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_conj_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ unsafe fn test_mm_mask3_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
);
- let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a);
- let e = _mm512_setr_ph(
- 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
- 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0,
- 33.0,
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_conj_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_conj_pch(0b0101010101010101, a);
- let e = _mm512_setr_ph(
- 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
- 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
+ unsafe fn test_mm_maskz_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_fmadd_pch(a, b, c);
- let e = _mm_set1_pch(-2.0, 3.0);
+ unsafe fn test_mm_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fmsub_ph(a, b, c);
+ let e = _mm_set1_ph(-1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask_fmadd_pch(a, 0b0101, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0);
+ unsafe fn test_mm_mask_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101);
- let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0);
+ unsafe fn test_mm_mask3_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_maskz_fmadd_pch(0b0101, a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0);
+ unsafe fn test_mm_maskz_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_fmadd_pch(a, b, c);
- let e = _mm256_set1_pch(-2.0, 3.0);
+ unsafe fn test_mm256_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fmsub_ph(a, b, c);
+ let e = _mm256_set1_ph(-1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c);
- let e = _mm256_setr_ph(
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ unsafe fn test_mm256_mask_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c);
+ let e = _mm256_set_ph(
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101);
- let e = _mm256_setr_ph(
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm256_mask3_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101);
+ let e = _mm256_set_ph(
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c);
- let e = _mm256_setr_ph(
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm256_maskz_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_fmadd_pch(a, b, c);
- let e = _mm512_set1_pch(-2.0, 3.0);
+ unsafe fn test_mm512_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmsub_ph(a, b, c);
+ let e = _mm512_set1_ph(-1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ unsafe fn test_mm512_mask_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c);
+ let e = _mm512_set_ph(
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm512_mask3_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101);
+ let e = _mm512_set_ph(
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm512_maskz_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r =
- _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_pch(-2.0, 3.0);
+ unsafe fn test_mm512_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(-1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
b,
c,
);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ let e = _mm512_set_ph(
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask3_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ let e = _mm512_set_ph(
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
+ unsafe fn test_mm512_maskz_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
c,
);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ let e = _mm512_set_ph(
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fmadd_sch(a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmsub_sh(a, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fmadd_sch(a, 0, b, c);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmsub_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_sch(a, 1, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_mask_fmsub_sh(a, 1, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fmadd_sch(a, b, c, 0);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ unsafe fn test_mm_mask3_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmsub_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_sch(a, b, c, 1);
- let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fmsub_sh(a, b, c, 1);
+ let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fmadd_sch(0, a, b, c);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_maskz_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmsub_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_sch(1, a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_maskz_fmsub_sh(1, a, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 0, b, c,
);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 1, b, c,
);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask3_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 0,
);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 1,
);
- let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_maskz_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0, a, b, c,
);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
1, a, b, c,
);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_fcmadd_pch(a, b, c);
- let e = _mm_set1_pch(2.0, 3.0);
+ unsafe fn test_mm_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fnmadd_ph(a, b, c);
+ let e = _mm_set1_ph(1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0);
+ unsafe fn test_mm_mask_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101);
- let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0);
+ unsafe fn test_mm_mask3_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0);
+ unsafe fn test_mm_maskz_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_fcmadd_pch(a, b, c);
- let e = _mm256_set1_pch(2.0, 3.0);
+ unsafe fn test_mm256_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fnmadd_ph(a, b, c);
+ let e = _mm256_set1_ph(1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c);
- let e = _mm256_setr_ph(
- 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ unsafe fn test_mm256_mask_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c);
+ let e = _mm256_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101);
- let e = _mm256_setr_ph(
- 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm256_mask3_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101);
+ let e = _mm256_set_ph(
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c);
- let e = _mm256_setr_ph(
- 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm256_maskz_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_fcmadd_pch(a, b, c);
- let e = _mm512_set1_pch(2.0, 3.0);
+ unsafe fn test_mm512_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fnmadd_ph(a, b, c);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
- 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ unsafe fn test_mm512_mask_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
- 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm512_mask3_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let e = _mm512_set_ph(
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
- 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm512_maskz_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
+ unsafe fn test_mm512_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
let r =
- _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_pch(2.0, 3.0);
+ _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
b,
c,
);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
- 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask3_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
- 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
+ let e = _mm512_set_ph(
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
c,
);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
- 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
+ let e = _mm512_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fcmadd_sch(a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fnmadd_sh(a, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fcmadd_sch(a, 0, b, c);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fnmadd_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fcmadd_sch(a, 1, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_mask_fnmadd_sh(a, 1, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fcmadd_sch(a, b, c, 0);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ unsafe fn test_mm_mask3_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmadd_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fcmadd_sch(a, b, c, 1);
- let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fnmadd_sh(a, b, c, 1);
+ let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fcmadd_sch(0, a, b, c);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_maskz_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fnmadd_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fcmadd_sch(1, a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_maskz_fnmadd_sh(1, a, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 0, b, c,
);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 1, b, c,
);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask3_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 0,
);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 1,
);
- let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_maskz_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0, a, b, c,
);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
1, a, b, c,
);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmadd_ph() {
+ unsafe fn test_mm_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_fmadd_ph(a, b, c);
- let e = _mm_set1_ph(5.0);
+ let r = _mm_fnmsub_ph(a, b, c);
+ let e = _mm_set1_ph(-5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmadd_ph() {
+ unsafe fn test_mm_mask_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0);
+ let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmadd_ph() {
+ unsafe fn test_mm_mask3_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0);
+ let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmadd_ph() {
+ unsafe fn test_mm_maskz_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0);
+ let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmadd_ph() {
+ unsafe fn test_mm256_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmadd_ph(a, b, c);
- let e = _mm256_set1_ph(5.0);
+ let r = _mm256_fnmsub_ph(a, b, c);
+ let e = _mm256_set1_ph(-5.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmadd_ph() {
+ unsafe fn test_mm256_mask_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c);
+ let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c);
let e = _mm256_set_ph(
- 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmadd_ph() {
+ unsafe fn test_mm256_mask3_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101);
+ let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101);
let e = _mm256_set_ph(
- 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmadd_ph() {
+ unsafe fn test_mm256_maskz_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c);
+ let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c);
let e = _mm256_set_ph(
- 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_ph() {
+ unsafe fn test_mm512_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmadd_ph(a, b, c);
- let e = _mm512_set1_ph(5.0);
+ let r = _mm512_fnmsub_ph(a, b, c);
+ let e = _mm512_set1_ph(-5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_ph() {
+ unsafe fn test_mm512_mask_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c);
let e = _mm512_set_ph(
- 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
- 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_ph() {
+ unsafe fn test_mm512_mask3_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101);
let e = _mm512_set_ph(
- 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
- 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_ph() {
+ unsafe fn test_mm512_maskz_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c);
let e = _mm512_set_ph(
- 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
- 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_round_ph() {
+ unsafe fn test_mm512_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(5.0);
+ let r =
+ _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(-5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_round_ph() {
+ unsafe fn test_mm512_mask_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
0b01010101010101010101010101010101,
b,
c,
);
let e = _mm512_set_ph(
- 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
- 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_round_ph() {
+ unsafe fn test_mm512_mask3_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
0b01010101010101010101010101010101,
);
let e = _mm512_set_ph(
- 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
- 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_round_ph() {
+ unsafe fn test_mm512_maskz_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
b,
c,
);
let e = _mm512_set_ph(
- 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
- 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_sh() {
+ unsafe fn test_mm_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmadd_sh(a, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_fnmsub_sh(a, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_sh() {
+ unsafe fn test_mm_mask_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmadd_sh(a, 0, b, c);
+ let r = _mm_mask_fnmsub_sh(a, 0, b, c);
let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_sh(a, 1, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_mask_fnmsub_sh(a, 1, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_sh() {
+ unsafe fn test_mm_mask3_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmadd_sh(a, b, c, 0);
+ let r = _mm_mask3_fnmsub_sh(a, b, c, 0);
let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_sh(a, b, c, 1);
- let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmsub_sh(a, b, c, 1);
+ let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_sh() {
+ unsafe fn test_mm_maskz_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmadd_sh(0, a, b, c);
+ let r = _mm_maskz_fnmsub_sh(0, a, b, c);
let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_sh(1, a, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_maskz_fnmsub_sh(1, a, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_round_sh() {
+ unsafe fn test_mm_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_round_sh() {
+ unsafe fn test_mm_mask_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 0, b, c,
);
let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 1, b, c,
);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_round_sh() {
+ unsafe fn test_mm_mask3_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 0,
);
let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 1,
);
- let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
+ let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_round_sh() {
+ unsafe fn test_mm_maskz_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0, a, b, c,
);
let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
1, a, b, c,
);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmsub_ph() {
+ unsafe fn test_mm_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_fmsub_ph(a, b, c);
- let e = _mm_set1_ph(-1.0);
+ let r = _mm_fmaddsub_ph(a, b, c);
+ let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmsub_ph() {
+ unsafe fn test_mm_mask_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0);
+ let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c);
+ let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmsub_ph() {
+ unsafe fn test_mm_mask3_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0);
+ let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011);
+ let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmsub_ph() {
+ unsafe fn test_mm_maskz_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0);
+ let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c);
+ let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmsub_ph() {
+ unsafe fn test_mm256_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmsub_ph(a, b, c);
- let e = _mm256_set1_ph(-1.0);
+ let r = _mm256_fmaddsub_ph(a, b, c);
+ let e = _mm256_set_ph(
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmsub_ph() {
+ unsafe fn test_mm256_mask_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c);
+ let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c);
let e = _mm256_set_ph(
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmsub_ph() {
+ unsafe fn test_mm256_mask3_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101);
+ let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011);
let e = _mm256_set_ph(
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmsub_ph() {
+ unsafe fn test_mm256_maskz_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c);
+ let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c);
let e = _mm256_set_ph(
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmsub_ph(a, b, c);
- let e = _mm512_set1_ph(-1.0);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsub_ph() {
+ unsafe fn test_mm512_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c);
+ let r = _mm512_fmaddsub_ph(a, b, c);
let e = _mm512_set_ph(
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsub_ph() {
+ unsafe fn test_mm512_mask_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101);
+ let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c);
let e = _mm512_set_ph(
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsub_ph() {
+ unsafe fn test_mm512_mask3_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c);
+ let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011);
let e = _mm512_set_ph(
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(-1.0);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsub_round_ph() {
+ unsafe fn test_mm512_maskz_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a,
- 0b01010101010101010101010101010101,
- b,
- c,
- );
+ let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c);
let e = _mm512_set_ph(
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsub_round_ph() {
+ unsafe fn test_mm512_fmaddsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a,
- b,
- c,
- 0b01010101010101010101010101010101,
- );
+ let r =
+ _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
let e = _mm512_set_ph(
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsub_round_ph() {
+ unsafe fn test_mm512_mask_fmaddsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- b,
- c,
- );
- let e = _mm512_set_ph(
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmsub_sh(a, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmsub_sh(a, 0, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fmsub_sh(a, 1, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmsub_sh(a, b, c, 0);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- let r = _mm_mask3_fmsub_sh(a, b, c, 1);
- let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmsub_sh(0, a, b, c);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fmsub_sh(1, a, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 0, b, c,
- );
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 1, b, c,
+ 0b00110011001100110011001100110011,
+ b,
+ c,
);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 0,
+ unsafe fn test_mm512_mask3_fmaddsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
+ b,
+ c,
+ 0b00110011001100110011001100110011,
);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 1,
+ let e = _mm512_set_ph(
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
);
- let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0, a, b, c,
+ unsafe fn test_mm512_maskz_fmaddsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00110011001100110011001100110011,
+ a,
+ b,
+ c,
);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 1, a, b, c,
+ let e = _mm512_set_ph(
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fnmadd_ph() {
+ unsafe fn test_mm_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_fnmadd_ph(a, b, c);
- let e = _mm_set1_ph(1.0);
+ let r = _mm_fmsubadd_ph(a, b, c);
+ let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fnmadd_ph() {
+ unsafe fn test_mm_mask_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
+ let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c);
+ let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fnmadd_ph() {
+ unsafe fn test_mm_mask3_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
+ let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011);
+ let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fnmadd_ph() {
+ unsafe fn test_mm_maskz_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+ let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c);
+ let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fnmadd_ph() {
+ unsafe fn test_mm256_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_fnmadd_ph(a, b, c);
- let e = _mm256_set1_ph(1.0);
+ let r = _mm256_fmsubadd_ph(a, b, c);
+ let e = _mm256_set_ph(
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fnmadd_ph() {
+ unsafe fn test_mm256_mask_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c);
+ let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c);
let e = _mm256_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fnmadd_ph() {
+ unsafe fn test_mm256_mask3_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101);
+ let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011);
let e = _mm256_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fnmadd_ph() {
+ unsafe fn test_mm256_maskz_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c);
+ let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c);
let e = _mm256_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmadd_ph() {
+ unsafe fn test_mm512_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fnmadd_ph(a, b, c);
- let e = _mm512_set1_ph(1.0);
+ let r = _mm512_fmsubadd_ph(a, b, c);
+ let e = _mm512_set_ph(
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmadd_ph() {
+ unsafe fn test_mm512_mask_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c);
let e = _mm512_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmadd_ph() {
+ unsafe fn test_mm512_mask3_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011);
let e = _mm512_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
- 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmadd_ph() {
+ unsafe fn test_mm512_maskz_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c);
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmadd_round_ph() {
+ unsafe fn test_mm512_fmsubadd_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
let r =
- _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(1.0);
+ _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set_ph(
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmadd_round_ph() {
+ unsafe fn test_mm512_mask_fmsubadd_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- 0b01010101010101010101010101010101,
+ 0b00110011001100110011001100110011,
b,
c,
);
let e = _mm512_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmadd_round_ph() {
+ unsafe fn test_mm512_mask3_fmsubadd_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
- 0b01010101010101010101010101010101,
+ 0b00110011001100110011001100110011,
+ );
+ let e = _mm512_set_ph(
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_fmsubadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00110011001100110011001100110011,
+ a,
+ b,
+ c,
+ );
+ let e = _mm512_set_ph(
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_rcp_ph() {
+ let a = _mm_set1_ph(2.0);
+ let r = _mm_rcp_ph(a);
+ let e = _mm_set1_ph(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_rcp_ph() {
+ let a = _mm_set1_ph(2.0);
+ let src = _mm_set1_ph(1.0);
+ let r = _mm_mask_rcp_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_rcp_ph() {
+ let a = _mm_set1_ph(2.0);
+ let r = _mm_maskz_rcp_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_rcp_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let r = _mm256_rcp_ph(a);
+ let e = _mm256_set1_ph(0.5);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_rcp_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let src = _mm256_set1_ph(1.0);
+ let r = _mm256_mask_rcp_ph(src, 0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_rcp_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let r = _mm256_maskz_rcp_ph(0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_rcp_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let r = _mm512_rcp_ph(a);
+ let e = _mm512_set1_ph(0.5);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_rcp_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_rcp_ph(src, 0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
- 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
+ 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
- a,
- b,
- c,
- );
+ unsafe fn test_mm512_maskz_rcp_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_rcp_ph(0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmadd_sh(a, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_rcp_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_rcp_sh(a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmadd_sh(a, 0, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_mask_rcp_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_rcp_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_mask_fnmadd_sh(a, 1, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_mask_rcp_sh(src, 1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmadd_sh(a, b, c, 0);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ unsafe fn test_mm_maskz_rcp_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_maskz_rcp_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmadd_sh(a, b, c, 1);
- let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_rcp_sh(1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmadd_sh(0, a, b, c);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_rsqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_rsqrt_ph(a);
+ let e = _mm_set1_ph(0.5);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmadd_sh(1, a, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_rsqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let src = _mm_set1_ph(1.0);
+ let r = _mm_mask_rsqrt_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_rsqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_maskz_rsqrt_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 0, b, c,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_rsqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_rsqrt_ph(a);
+ let e = _mm256_set1_ph(0.5);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_rsqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let src = _mm256_set1_ph(1.0);
+ let r = _mm256_mask_rsqrt_ph(src, 0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 1, b, c,
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_rsqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_maskz_rsqrt_ph(0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 0,
- );
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 1,
- );
- let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_rsqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_rsqrt_ph(a);
+ let e = _mm512_set1_ph(0.5);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0, a, b, c,
+ unsafe fn test_mm512_mask_rsqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_rsqrt_ph(src, 0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
+ 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 1, a, b, c,
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_rsqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_maskz_rsqrt_ph(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_rsqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_rsqrt_sh(a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_fnmsub_ph(a, b, c);
- let e = _mm_set1_ph(-5.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_rsqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_rsqrt_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_rsqrt_sh(src, 1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_rsqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_maskz_rsqrt_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_rsqrt_sh(1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0);
+ unsafe fn test_mm_sqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_sqrt_ph(a);
+ let e = _mm_set1_ph(2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0);
+ unsafe fn test_mm_mask_sqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let src = _mm_set1_ph(1.0);
+ let r = _mm_mask_sqrt_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_fnmsub_ph(a, b, c);
- let e = _mm256_set1_ph(-5.0);
- assert_eq_m256h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_sqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_maskz_sqrt_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c);
- let e = _mm256_set_ph(
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- );
+ unsafe fn test_mm256_sqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_sqrt_ph(a);
+ let e = _mm256_set1_ph(2.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101);
+ unsafe fn test_mm256_mask_sqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let src = _mm256_set1_ph(1.0);
+ let r = _mm256_mask_sqrt_ph(src, 0b0101010101010101, a);
let e = _mm256_set_ph(
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c);
+ unsafe fn test_mm256_maskz_sqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_maskz_sqrt_ph(0b0101010101010101, a);
let e = _mm256_set_ph(
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_fnmsub_ph(a, b, c);
- let e = _mm512_set1_ph(-5.0);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c);
- let e = _mm512_set_ph(
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- );
+ unsafe fn test_mm512_sqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_sqrt_ph(a);
+ let e = _mm512_set1_ph(2.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101);
+ unsafe fn test_mm512_mask_sqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_sqrt_ph(src, 0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c);
+ unsafe fn test_mm512_maskz_sqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_maskz_sqrt_ph(0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r =
- _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(-5.0);
+ unsafe fn test_mm512_sqrt_round_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm512_set1_ph(2.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a,
+ unsafe fn test_mm512_mask_sqrt_round_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
0b01010101010101010101010101010101,
- b,
- c,
- );
- let e = _mm512_set_ph(
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- b,
- c,
- 0b01010101010101010101010101010101,
);
let e = _mm512_set_ph(
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_sqrt_round_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
- b,
- c,
);
let e = _mm512_set_ph(
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmsub_sh(a, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmsub_sh(a, 0, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fnmsub_sh(a, 1, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmsub_sh(a, b, c, 0);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmsub_sh(a, b, c, 1);
- let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmsub_sh(0, a, b, c);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmsub_sh(1, a, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_sqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_sqrt_sh(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 0, b, c,
- );
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_mask_sqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_sqrt_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 1, b, c,
- );
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_mask_sqrt_sh(src, 1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 0,
- );
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ unsafe fn test_mm_maskz_sqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_maskz_sqrt_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 1,
- );
- let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_sqrt_sh(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0, a, b, c,
+ unsafe fn test_mm_sqrt_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_sqrt_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 1, a, b, c,
+ let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_fmaddsub_ph(a, b, c);
- let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_sqrt_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r =
+ _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c);
- let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0);
+ let r =
+ _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011);
- let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0);
+ unsafe fn test_mm_max_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_max_ph(a, b);
+ let e = _mm_set1_ph(2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c);
- let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0);
+ unsafe fn test_mm_mask_max_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let src = _mm_set1_ph(3.0);
+ let r = _mm_mask_max_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmaddsub_ph(a, b, c);
- let e = _mm256_set_ph(
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- );
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_maskz_max_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_maskz_max_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c);
- let e = _mm256_set_ph(
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- );
+ unsafe fn test_mm256_max_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_max_ph(a, b);
+ let e = _mm256_set1_ph(2.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011);
+ unsafe fn test_mm256_mask_max_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let src = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_max_ph(src, 0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c);
+ unsafe fn test_mm256_maskz_max_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_maskz_max_ph(0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmaddsub_ph(a, b, c);
- let e = _mm512_set_ph(
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c);
- let e = _mm512_set_ph(
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- );
+ unsafe fn test_mm512_max_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_max_ph(a, b);
+ let e = _mm512_set1_ph(2.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011);
+ unsafe fn test_mm512_mask_max_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_max_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
+ 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c);
+ unsafe fn test_mm512_maskz_max_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_max_ph(0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r =
- _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set_ph(
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- );
+ unsafe fn test_mm512_max_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(2.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_max_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
a,
- 0b00110011001100110011001100110011,
b,
- c,
);
let e = _mm512_set_ph(
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
+ 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
+ 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_max_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
- c,
- 0b00110011001100110011001100110011,
);
let e = _mm512_set_ph(
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b00110011001100110011001100110011,
- a,
- b,
- c,
+ unsafe fn test_mm_max_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_max_sh(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_max_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_max_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_max_sh(src, 1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_max_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_maskz_max_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_max_sh(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_max_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_max_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
);
- let e = _mm512_set_ph(
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_fmsubadd_ph(a, b, c);
- let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c);
- let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_max_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r =
+ _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011);
- let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0);
+ unsafe fn test_mm_min_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_min_ph(a, b);
+ let e = _mm_set1_ph(1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c);
- let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0);
+ unsafe fn test_mm_mask_min_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let src = _mm_set1_ph(3.0);
+ let r = _mm_mask_min_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmsubadd_ph(a, b, c);
- let e = _mm256_set_ph(
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- );
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_maskz_min_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_maskz_min_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c);
- let e = _mm256_set_ph(
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
- );
+ unsafe fn test_mm256_min_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_min_ph(a, b);
+ let e = _mm256_set1_ph(1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011);
+ unsafe fn test_mm256_mask_min_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let src = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_min_ph(src, 0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c);
+ unsafe fn test_mm256_maskz_min_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_maskz_min_ph(0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmsubadd_ph(a, b, c);
- let e = _mm512_set_ph(
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- );
+ unsafe fn test_mm512_min_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_min_ph(a, b);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c);
+ unsafe fn test_mm512_mask_min_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_min_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011);
+ unsafe fn test_mm512_maskz_min_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_min_ph(0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c);
- let e = _mm512_set_ph(
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
- );
+ unsafe fn test_mm512_min_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r =
- _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ unsafe fn test_mm512_mask_min_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
let e = _mm512_set_ph(
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_min_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
- 0b00110011001100110011001100110011,
b,
- c,
);
let e = _mm512_set_ph(
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a,
- b,
- c,
- 0b00110011001100110011001100110011,
- );
- let e = _mm512_set_ph(
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
- );
- assert_eq_m512h(r, e);
+ unsafe fn test_mm_min_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_min_sh(a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_min_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_min_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_min_sh(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_min_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_maskz_min_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_min_sh(1, a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_min_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b00110011001100110011001100110011,
- a,
- b,
- c,
+ unsafe fn test_mm_mask_min_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
);
- let e = _mm512_set_ph(
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_min_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r =
+ _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_rcp_ph() {
- let a = _mm_set1_ph(2.0);
- let r = _mm_rcp_ph(a);
- let e = _mm_set1_ph(0.5);
+ unsafe fn test_mm_getexp_ph() {
+ let a = _mm_set1_ph(3.0);
+ let r = _mm_getexp_ph(a);
+ let e = _mm_set1_ph(1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_rcp_ph() {
- let a = _mm_set1_ph(2.0);
- let src = _mm_set1_ph(1.0);
- let r = _mm_mask_rcp_ph(src, 0b01010101, a);
- let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
+ unsafe fn test_mm_mask_getexp_ph() {
+ let a = _mm_set1_ph(3.0);
+ let src = _mm_set1_ph(4.0);
+ let r = _mm_mask_getexp_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_rcp_ph() {
- let a = _mm_set1_ph(2.0);
- let r = _mm_maskz_rcp_ph(0b01010101, a);
- let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ unsafe fn test_mm_maskz_getexp_ph() {
+ let a = _mm_set1_ph(3.0);
+ let r = _mm_maskz_getexp_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_rcp_ph() {
- let a = _mm256_set1_ph(2.0);
- let r = _mm256_rcp_ph(a);
- let e = _mm256_set1_ph(0.5);
+ unsafe fn test_mm256_getexp_ph() {
+ let a = _mm256_set1_ph(3.0);
+ let r = _mm256_getexp_ph(a);
+ let e = _mm256_set1_ph(1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_rcp_ph() {
- let a = _mm256_set1_ph(2.0);
- let src = _mm256_set1_ph(1.0);
- let r = _mm256_mask_rcp_ph(src, 0b0101010101010101, a);
+ unsafe fn test_mm256_mask_getexp_ph() {
+ let a = _mm256_set1_ph(3.0);
+ let src = _mm256_set1_ph(4.0);
+ let r = _mm256_mask_getexp_ph(src, 0b0101010101010101, a);
let e = _mm256_set_ph(
- 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_rcp_ph() {
- let a = _mm256_set1_ph(2.0);
- let r = _mm256_maskz_rcp_ph(0b0101010101010101, a);
+ unsafe fn test_mm256_maskz_getexp_ph() {
+ let a = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_getexp_ph(0b0101010101010101, a);
let e = _mm256_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_rcp_ph() {
- let a = _mm512_set1_ph(2.0);
- let r = _mm512_rcp_ph(a);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_getexp_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let r = _mm512_getexp_ph(a);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_rcp_ph() {
- let a = _mm512_set1_ph(2.0);
- let src = _mm512_set1_ph(1.0);
- let r = _mm512_mask_rcp_ph(src, 0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_mask_getexp_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let src = _mm512_set1_ph(4.0);
+ let r = _mm512_mask_getexp_ph(src, 0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
- 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
+ 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_rcp_ph() {
- let a = _mm512_set1_ph(2.0);
- let r = _mm512_maskz_rcp_ph(0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_maskz_getexp_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_getexp_ph(0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_rcp_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_rcp_sh(a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm512_getexp_round_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let r = _mm512_getexp_round_ph::<_MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_getexp_round_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let src = _mm512_set1_ph(4.0);
+ let r = _mm512_mask_getexp_round_ph::<_MM_FROUND_NO_EXC>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ );
+ let e = _mm512_set_ph(
+ 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
+ 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_getexp_round_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_getexp_round_ph::<_MM_FROUND_NO_EXC>(
+ 0b01010101010101010101010101010101,
+ a,
+ );
+ let e = _mm512_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_getexp_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_getexp_sh(a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_rcp_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_rcp_sh(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_getexp_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_getexp_sh(src, 0, a, b);
+ let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_rcp_sh(src, 1, a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_getexp_sh(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_rcp_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_maskz_rcp_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_getexp_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_getexp_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_rcp_sh(1, a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_getexp_sh(1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_getexp_round_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_getexp_round_sh::<_MM_FROUND_NO_EXC>(a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_getexp_round_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 0, a, b);
+ let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_getexp_round_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_rsqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let r = _mm_rsqrt_ph(a);
- let e = _mm_set1_ph(0.5);
+ unsafe fn test_mm_getmant_ph() {
+ let a = _mm_set1_ph(10.0);
+ let r = _mm_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
+ let e = _mm_set1_ph(1.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_rsqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let src = _mm_set1_ph(1.0);
- let r = _mm_mask_rsqrt_ph(src, 0b01010101, a);
- let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
+ unsafe fn test_mm_mask_getmant_ph() {
+ let a = _mm_set1_ph(10.0);
+ let src = _mm_set1_ph(20.0);
+ let r = _mm_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0b01010101, a);
+ let e = _mm_set_ph(20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_rsqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let r = _mm_maskz_rsqrt_ph(0b01010101, a);
- let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ unsafe fn test_mm_maskz_getmant_ph() {
+ let a = _mm_set1_ph(10.0);
+ let r = _mm_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0b01010101, a);
+ let e = _mm_set_ph(0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_rsqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let r = _mm256_rsqrt_ph(a);
- let e = _mm256_set1_ph(0.5);
+ unsafe fn test_mm256_getmant_ph() {
+ let a = _mm256_set1_ph(10.0);
+ let r = _mm256_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
+ let e = _mm256_set1_ph(1.25);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_rsqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let src = _mm256_set1_ph(1.0);
- let r = _mm256_mask_rsqrt_ph(src, 0b0101010101010101, a);
+ unsafe fn test_mm256_mask_getmant_ph() {
+ let a = _mm256_set1_ph(10.0);
+ let src = _mm256_set1_ph(20.0);
+ let r = _mm256_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
+ src,
+ 0b0101010101010101,
+ a,
+ );
let e = _mm256_set_ph(
- 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
+ 20.0, 1.25,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_rsqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let r = _mm256_maskz_rsqrt_ph(0b0101010101010101, a);
+ unsafe fn test_mm256_maskz_getmant_ph() {
+ let a = _mm256_set1_ph(10.0);
+ let r = _mm256_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
+ 0b0101010101010101,
+ a,
+ );
let e = _mm256_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_rsqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_rsqrt_ph(a);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_getmant_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let r = _mm512_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
+ let e = _mm512_set1_ph(1.25);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_rsqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let src = _mm512_set1_ph(1.0);
- let r = _mm512_mask_rsqrt_ph(src, 0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_mask_getmant_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let src = _mm512_set1_ph(20.0);
+ let r = _mm512_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ );
let e = _mm512_set_ph(
- 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
- 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
+ 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
+ 20.0, 1.25, 20.0, 1.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_rsqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_maskz_rsqrt_ph(0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_maskz_getmant_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let r = _mm512_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
+ 0b01010101010101010101010101010101,
+ a,
+ );
let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
+ 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_rsqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r = _mm_rsqrt_sh(a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm512_getmant_round_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let r =
+ _mm512_getmant_round_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
+ a,
+ );
+ let e = _mm512_set1_ph(1.25);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_getmant_round_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let src = _mm512_set1_ph(20.0);
+ let r = _mm512_mask_getmant_round_ph::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(src, 0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
+ 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
+ 20.0, 1.25, 20.0, 1.25,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_getmant_round_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let r = _mm512_maskz_getmant_round_ph::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
+ 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_getmant_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a, b);
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_rsqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_rsqrt_sh(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_getmant_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0, a, b);
+ let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_rsqrt_sh(src, 1, a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 1, a, b);
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_rsqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r = _mm_maskz_rsqrt_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_getmant_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_rsqrt_sh(1, a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(1, a, b);
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_sqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let r = _mm_sqrt_ph(a);
- let e = _mm_set1_ph(2.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_getmant_round_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_getmant_round_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
+ a, b,
+ );
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_getmant_round_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_getmant_round_sh::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(src, 0, a, b);
+ let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_getmant_round_sh::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(src, 1, a, b);
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_getmant_round_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_getmant_round_sh::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_getmant_round_sh::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(1, a, b);
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_sqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let src = _mm_set1_ph(1.0);
- let r = _mm_mask_sqrt_ph(src, 0b01010101, a);
- let e = _mm_set_ph(1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0);
+ unsafe fn test_mm_roundscale_ph() {
+ let a = _mm_set1_ph(1.1);
+ let r = _mm_roundscale_ph::<0>(a);
+ let e = _mm_set1_ph(1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_sqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let r = _mm_maskz_sqrt_ph(0b01010101, a);
- let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
+ unsafe fn test_mm_mask_roundscale_ph() {
+ let a = _mm_set1_ph(1.1);
+ let src = _mm_set1_ph(2.0);
+ let r = _mm_mask_roundscale_ph::<0>(src, 0b01010101, a);
+ let e = _mm_set_ph(2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_roundscale_ph() {
+ let a = _mm_set1_ph(1.1);
+ let r = _mm_maskz_roundscale_ph::<0>(0b01010101, a);
+ let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_sqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let r = _mm256_sqrt_ph(a);
- let e = _mm256_set1_ph(2.0);
+ unsafe fn test_mm256_roundscale_ph() {
+ let a = _mm256_set1_ph(1.1);
+ let r = _mm256_roundscale_ph::<0>(a);
+ let e = _mm256_set1_ph(1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_sqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let src = _mm256_set1_ph(1.0);
- let r = _mm256_mask_sqrt_ph(src, 0b0101010101010101, a);
+ unsafe fn test_mm256_mask_roundscale_ph() {
+ let a = _mm256_set1_ph(1.1);
+ let src = _mm256_set1_ph(2.0);
+ let r = _mm256_mask_roundscale_ph::<0>(src, 0b0101010101010101, a);
let e = _mm256_set_ph(
- 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_sqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let r = _mm256_maskz_sqrt_ph(0b0101010101010101, a);
+ unsafe fn test_mm256_maskz_roundscale_ph() {
+ let a = _mm256_set1_ph(1.1);
+ let r = _mm256_maskz_roundscale_ph::<0>(0b0101010101010101, a);
let e = _mm256_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_sqrt_ph(a);
- let e = _mm512_set1_ph(2.0);
+ unsafe fn test_mm512_roundscale_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let r = _mm512_roundscale_ph::<0>(a);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let src = _mm512_set1_ph(1.0);
- let r = _mm512_mask_sqrt_ph(src, 0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_mask_roundscale_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let src = _mm512_set1_ph(2.0);
+ let r = _mm512_mask_roundscale_ph::<0>(src, 0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
- 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_maskz_sqrt_ph(0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_maskz_roundscale_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let r = _mm512_maskz_roundscale_ph::<0>(0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
- 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sqrt_round_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
- let e = _mm512_set1_ph(2.0);
+ unsafe fn test_mm512_roundscale_round_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let r = _mm512_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sqrt_round_ph() {
- let a = _mm512_set1_ph(4.0);
- let src = _mm512_set1_ph(1.0);
- let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_roundscale_round_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let src = _mm512_set1_ph(2.0);
+ let r = _mm512_mask_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
src,
0b01010101010101010101010101010101,
a,
);
let e = _mm512_set_ph(
- 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
- 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sqrt_round_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_roundscale_round_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let r = _mm512_maskz_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
0b01010101010101010101010101010101,
a,
);
let e = _mm512_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
- 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r = _mm_sqrt_sh(a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_roundscale_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_roundscale_sh::<0>(a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_sqrt_sh(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_roundscale_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_roundscale_sh::<0>(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_sqrt_sh(src, 1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_roundscale_sh::<0>(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r = _mm_maskz_sqrt_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_roundscale_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_roundscale_sh::<0>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_sqrt_sh(1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_roundscale_sh::<0>(1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sqrt_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_roundscale_round_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sqrt_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_roundscale_round_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sqrt_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r =
- _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_roundscale_round_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r =
- _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_max_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let r = _mm_max_ph(a, b);
- let e = _mm_set1_ph(2.0);
+ unsafe fn test_mm_scalef_ph() {
+ let a = _mm_set1_ph(1.);
+ let b = _mm_set1_ph(3.);
+ let r = _mm_scalef_ph(a, b);
+ let e = _mm_set1_ph(8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_max_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let src = _mm_set1_ph(3.0);
- let r = _mm_mask_max_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0);
+ unsafe fn test_mm_mask_scalef_ph() {
+ let a = _mm_set1_ph(1.);
+ let b = _mm_set1_ph(3.);
+ let src = _mm_set1_ph(2.);
+ let r = _mm_mask_scalef_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_max_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let r = _mm_maskz_max_ph(0b01010101, a, b);
- let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
+ unsafe fn test_mm_maskz_scalef_ph() {
+ let a = _mm_set1_ph(1.);
+ let b = _mm_set1_ph(3.);
+ let r = _mm_maskz_scalef_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_max_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let r = _mm256_max_ph(a, b);
- let e = _mm256_set1_ph(2.0);
+ unsafe fn test_mm256_scalef_ph() {
+ let a = _mm256_set1_ph(1.);
+ let b = _mm256_set1_ph(3.);
+ let r = _mm256_scalef_ph(a, b);
+ let e = _mm256_set1_ph(8.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_max_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let src = _mm256_set1_ph(3.0);
- let r = _mm256_mask_max_ph(src, 0b0101010101010101, a, b);
+ unsafe fn test_mm256_mask_scalef_ph() {
+ let a = _mm256_set1_ph(1.);
+ let b = _mm256_set1_ph(3.);
+ let src = _mm256_set1_ph(2.);
+ let r = _mm256_mask_scalef_ph(src, 0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
+ 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_max_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let r = _mm256_maskz_max_ph(0b0101010101010101, a, b);
+ unsafe fn test_mm256_maskz_scalef_ph() {
+ let a = _mm256_set1_ph(1.);
+ let b = _mm256_set1_ph(3.);
+ let r = _mm256_maskz_scalef_ph(0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_max_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_max_ph(a, b);
- let e = _mm512_set1_ph(2.0);
+ unsafe fn test_mm512_scalef_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let r = _mm512_scalef_ph(a, b);
+ let e = _mm512_set1_ph(8.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_max_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let src = _mm512_set1_ph(3.0);
- let r = _mm512_mask_max_ph(src, 0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_mask_scalef_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let src = _mm512_set1_ph(2.);
+ let r = _mm512_mask_scalef_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
- 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
+ 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
+ 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_max_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_maskz_max_ph(0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_maskz_scalef_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let r = _mm512_maskz_scalef_ph(0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
- 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
+ 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_max_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(2.0);
+ unsafe fn test_mm512_scalef_round_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let r = _mm512_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(8.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_max_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let src = _mm512_set1_ph(3.0);
- let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_scalef_round_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let src = _mm512_set1_ph(2.);
+ let r = _mm512_mask_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
0b01010101010101010101010101010101,
a,
b,
);
let e = _mm512_set_ph(
- 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
- 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
+ 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
+ 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_max_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_scalef_round_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let r = _mm512_maskz_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
b,
);
let e = _mm512_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
- 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
+ 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_max_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_max_sh(a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_scalef_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_scalef_sh(a, b);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_max_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_max_sh(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_scalef_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_scalef_sh(src, 0, a, b);
+ let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_max_sh(src, 1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_scalef_sh(src, 1, a, b);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_max_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_maskz_max_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_scalef_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_scalef_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_max_sh(1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_scalef_sh(1, a, b);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_max_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_scalef_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_max_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_scalef_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 1, a, b,
);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_max_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ unsafe fn test_mm_maskz_scalef_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
let r =
- _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
let r =
- _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_min_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let r = _mm_min_ph(a, b);
- let e = _mm_set1_ph(1.0);
+ unsafe fn test_mm_reduce_ph() {
+ let a = _mm_set1_ph(1.25);
+ let r = _mm_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
+ let e = _mm_set1_ph(0.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_min_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let src = _mm_set1_ph(3.0);
- let r = _mm_mask_min_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
+ unsafe fn test_mm_mask_reduce_ph() {
+ let a = _mm_set1_ph(1.25);
+ let src = _mm_set1_ph(2.0);
+ let r = _mm_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01010101, a);
+ let e = _mm_set_ph(2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_min_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let r = _mm_maskz_min_ph(0b01010101, a, b);
- let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+ unsafe fn test_mm_maskz_reduce_ph() {
+ let a = _mm_set1_ph(1.25);
+ let r = _mm_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01010101, a);
+ let e = _mm_set_ph(0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_min_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let r = _mm256_min_ph(a, b);
- let e = _mm256_set1_ph(1.0);
+ unsafe fn test_mm256_reduce_ph() {
+ let a = _mm256_set1_ph(1.25);
+ let r = _mm256_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
+ let e = _mm256_set1_ph(0.25);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_min_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let src = _mm256_set1_ph(3.0);
- let r = _mm256_mask_min_ph(src, 0b0101010101010101, a, b);
+ unsafe fn test_mm256_mask_reduce_ph() {
+ let a = _mm256_set1_ph(1.25);
+ let src = _mm256_set1_ph(2.0);
+ let r = _mm256_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0101010101010101, a);
let e = _mm256_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_min_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let r = _mm256_maskz_min_ph(0b0101010101010101, a, b);
+ unsafe fn test_mm256_maskz_reduce_ph() {
+ let a = _mm256_set1_ph(1.25);
+ let r = _mm256_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0101010101010101, a);
let e = _mm256_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_min_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_min_ph(a, b);
- let e = _mm512_set1_ph(1.0);
+ unsafe fn test_mm512_reduce_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let r = _mm512_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
+ let e = _mm512_set1_ph(0.25);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_min_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let src = _mm512_set1_ph(3.0);
- let r = _mm512_mask_min_ph(src, 0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_mask_reduce_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let src = _mm512_set1_ph(2.0);
+ let r = _mm512_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ );
let e = _mm512_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
- 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
+ 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_min_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_maskz_min_ph(0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_maskz_reduce_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let r = _mm512_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
+ 0b01010101010101010101010101010101,
+ a,
+ );
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
+ 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_min_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(1.0);
+ unsafe fn test_mm512_reduce_round_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let r = _mm512_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set1_ph(0.25);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_min_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let src = _mm512_set1_ph(3.0);
- let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_reduce_round_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let src = _mm512_set1_ph(2.0);
+ let r = _mm512_mask_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
src,
0b01010101010101010101010101010101,
a,
- b,
);
let e = _mm512_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
- 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
+ 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_min_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_reduce_round_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let r = _mm512_maskz_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
0b01010101010101010101010101010101,
a,
- b,
);
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
+ 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_min_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_min_sh(a, b);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_reduce_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_min_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_min_sh(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_reduce_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0, a, b);
+ let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_min_sh(src, 1, a, b);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 1, a, b);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_min_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_maskz_min_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_reduce_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_min_sh(1, a, b);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(1, a, b);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_min_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_reduce_round_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_min_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_reduce_round_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
src, 0, a, b,
);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
src, 1, a, b,
);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_min_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ unsafe fn test_mm_maskz_reduce_round_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
let r =
- _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
let r =
- _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(1, a, b);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_getexp_ph() {
- let a = _mm_set1_ph(3.0);
- let r = _mm_getexp_ph(a);
- let e = _mm_set1_ph(1.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm_reduce_add_ph() {
+ let a = _mm_set1_ph(2.0);
+ let r = _mm_reduce_add_ph(a);
+ assert_eq!(r, 16.0);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_getexp_ph() {
- let a = _mm_set1_ph(3.0);
- let src = _mm_set1_ph(4.0);
- let r = _mm_mask_getexp_ph(src, 0b01010101, a);
- let e = _mm_set_ph(4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm256_reduce_add_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let r = _mm256_reduce_add_ph(a);
+ assert_eq!(r, 32.0);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_getexp_ph() {
- let a = _mm_set1_ph(3.0);
- let r = _mm_maskz_getexp_ph(0b01010101, a);
- let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_reduce_add_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let r = _mm512_reduce_add_ph(a);
+ assert_eq!(r, 64.0);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_getexp_ph() {
- let a = _mm256_set1_ph(3.0);
- let r = _mm256_getexp_ph(a);
- let e = _mm256_set1_ph(1.0);
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_reduce_mul_ph() {
+ let a = _mm_set1_ph(2.0);
+ let r = _mm_reduce_mul_ph(a);
+ assert_eq!(r, 256.0);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_getexp_ph() {
- let a = _mm256_set1_ph(3.0);
- let src = _mm256_set1_ph(4.0);
- let r = _mm256_mask_getexp_ph(src, 0b0101010101010101, a);
- let e = _mm256_set_ph(
- 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
- );
- assert_eq_m256h(r, e);
+ unsafe fn test_mm256_reduce_mul_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let r = _mm256_reduce_mul_ph(a);
+ assert_eq!(r, 65536.0);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_getexp_ph() {
- let a = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_getexp_ph(0b0101010101010101, a);
- let e = _mm256_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
- );
- assert_eq_m256h(r, e);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_reduce_mul_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let r = _mm512_reduce_mul_ph(a);
+ assert_eq!(r, 16777216.0);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_getexp_ph() {
- let a = _mm512_set1_ph(3.0);
- let r = _mm512_getexp_ph(a);
- let e = _mm512_set1_ph(1.0);
- assert_eq_m512h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_reduce_max_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_reduce_max_ph(a);
+ assert_eq!(r, 8.0);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_getexp_ph() {
- let a = _mm512_set1_ph(3.0);
- let src = _mm512_set1_ph(4.0);
- let r = _mm512_mask_getexp_ph(src, 0b01010101010101010101010101010101, a);
- let e = _mm512_set_ph(
- 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
- 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_reduce_max_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- assert_eq_m512h(r, e);
+ let r = _mm256_reduce_max_ph(a);
+ assert_eq!(r, 16.0);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_getexp_ph() {
- let a = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_getexp_ph(0b01010101010101010101010101010101, a);
- let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ unsafe fn test_mm512_reduce_max_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- assert_eq_m512h(r, e);
+ let r = _mm512_reduce_max_ph(a);
+ assert_eq!(r, 32.0);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_getexp_round_ph() {
- let a = _mm512_set1_ph(3.0);
- let r = _mm512_getexp_round_ph::<_MM_FROUND_NO_EXC>(a);
- let e = _mm512_set1_ph(1.0);
- assert_eq_m512h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_reduce_min_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_reduce_min_ph(a);
+ assert_eq!(r, 1.0);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_getexp_round_ph() {
- let a = _mm512_set1_ph(3.0);
- let src = _mm512_set1_ph(4.0);
- let r = _mm512_mask_getexp_round_ph::<_MM_FROUND_NO_EXC>(
- src,
- 0b01010101010101010101010101010101,
- a,
- );
- let e = _mm512_set_ph(
- 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
- 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_reduce_min_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- assert_eq_m512h(r, e);
+ let r = _mm256_reduce_min_ph(a);
+ assert_eq!(r, 1.0);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_getexp_round_ph() {
- let a = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_getexp_round_ph::<_MM_FROUND_NO_EXC>(
- 0b01010101010101010101010101010101,
- a,
+ unsafe fn test_mm512_reduce_min_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ let r = _mm512_reduce_min_ph(a);
+ assert_eq!(r, 1.0);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_fpclass_ph_mask() {
+ let a = _mm_set_ph(
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
);
- assert_eq_m512h(r, e);
+ let r = _mm_fpclass_ph_mask::<0x18>(a); // infinities
+ assert_eq!(r, 0b01100000);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_getexp_sh() {
- let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_getexp_sh(a, b);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_fpclass_ph_mask() {
+ let a = _mm_set_ph(
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ );
+ let r = _mm_mask_fpclass_ph_mask::<0x18>(0b01010101, a);
+ assert_eq!(r, 0b01000000);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_getexp_sh() {
- let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
- let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_getexp_sh(src, 0, a, b);
- let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_getexp_sh(src, 1, a, b);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fpclass_ph_mask() {
+ let a = _mm256_set_ph(
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ );
+ let r = _mm256_fpclass_ph_mask::<0x18>(a); // infinities
+ assert_eq!(r, 0b0110000001100000);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_getexp_sh() {
- let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_maskz_getexp_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_getexp_sh(1, a, b);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_fpclass_ph_mask() {
+ let a = _mm256_set_ph(
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ );
+ let r = _mm256_mask_fpclass_ph_mask::<0x18>(0b0101010101010101, a);
+ assert_eq!(r, 0b0100000001000000);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_getexp_round_sh() {
- let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_getexp_round_sh::<_MM_FROUND_NO_EXC>(a, b);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_fpclass_ph_mask() {
+ let a = _mm512_set_ph(
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ );
+ let r = _mm512_fpclass_ph_mask::<0x18>(a); // infinities
+ assert_eq!(r, 0b01100000011000000110000001100000);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_getexp_round_sh() {
- let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
- let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 0, a, b);
- let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 1, a, b);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_mask_fpclass_ph_mask() {
+ let a = _mm512_set_ph(
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ 1.,
+ f16::INFINITY,
+ f16::NEG_INFINITY,
+ 0.0,
+ -0.0,
+ -2.0,
+ f16::NAN,
+ 5.9e-8, // Denormal
+ );
+ let r = _mm512_mask_fpclass_ph_mask::<0x18>(0b01010101010101010101010101010101, a);
+ assert_eq!(r, 0b01000000010000000100000001000000);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_getexp_round_sh() {
- let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(0, a, b);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(1, a, b);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_getmant_ph() {
- let a = _mm_set1_ph(10.0);
- let r = _mm_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
- let e = _mm_set1_ph(1.25);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm_fpclass_sh_mask() {
+ let a = _mm_set_sh(f16::INFINITY);
+ let r = _mm_fpclass_sh_mask::<0x18>(a);
+ assert_eq!(r, 1);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_getmant_ph() {
- let a = _mm_set1_ph(10.0);
- let src = _mm_set1_ph(20.0);
- let r = _mm_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0b01010101, a);
- let e = _mm_set_ph(20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_fpclass_sh_mask() {
+ let a = _mm_set_sh(f16::INFINITY);
+ let r = _mm_mask_fpclass_sh_mask::<0x18>(0, a);
+ assert_eq!(r, 0);
+ let r = _mm_mask_fpclass_sh_mask::<0x18>(1, a);
+ assert_eq!(r, 1);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_getmant_ph() {
- let a = _mm_set1_ph(10.0);
- let r = _mm_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0b01010101, a);
- let e = _mm_set_ph(0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25);
+ unsafe fn test_mm_mask_blend_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0);
+ let r = _mm_mask_blend_ph(0b01010101, a, b);
+ let e = _mm_set_ph(1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_getmant_ph() {
- let a = _mm256_set1_ph(10.0);
- let r = _mm256_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
- let e = _mm256_set1_ph(1.25);
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_getmant_ph() {
- let a = _mm256_set1_ph(10.0);
- let src = _mm256_set1_ph(20.0);
- let r = _mm256_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
- src,
- 0b0101010101010101,
- a,
- );
- let e = _mm256_set_ph(
- 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
- 20.0, 1.25,
+ unsafe fn test_mm256_mask_blend_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_getmant_ph() {
- let a = _mm256_set1_ph(10.0);
- let r = _mm256_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
- 0b0101010101010101,
- a,
+ let b = _mm256_set_ph(
+ -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0,
+ -14.0, -15.0, -16.0,
);
+ let r = _mm256_mask_blend_ph(0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
+ 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0,
+ -16.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_getmant_ph() {
- let a = _mm512_set1_ph(10.0);
- let r = _mm512_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
- let e = _mm512_set1_ph(1.25);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_getmant_ph() {
- let a = _mm512_set1_ph(10.0);
- let src = _mm512_set1_ph(20.0);
- let r = _mm512_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
- src,
- 0b01010101010101010101010101010101,
- a,
- );
- let e = _mm512_set_ph(
- 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
- 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
- 20.0, 1.25, 20.0, 1.25,
+ unsafe fn test_mm512_mask_blend_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_getmant_ph() {
- let a = _mm512_set1_ph(10.0);
- let r = _mm512_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
- 0b01010101010101010101010101010101,
- a,
+ let b = _mm512_set_ph(
+ -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0,
+ -14.0, -15.0, -16.0, -17.0, -18.0, -19.0, -20.0, -21.0, -22.0, -23.0, -24.0, -25.0,
+ -26.0, -27.0, -28.0, -29.0, -30.0, -31.0, -32.0,
);
+ let r = _mm512_mask_blend_ph(0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
- 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
+ 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0,
+ -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0, 27.0, -28.0,
+ 29.0, -30.0, 31.0, -32.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_getmant_round_ph() {
- let a = _mm512_set1_ph(10.0);
- let r =
- _mm512_getmant_round_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
- a,
- );
- let e = _mm512_set1_ph(1.25);
- assert_eq_m512h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_permutex2var_ph() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_setr_ph(9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let idx = _mm_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14);
+ let r = _mm_permutex2var_ph(a, idx, b);
+ let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_getmant_round_ph() {
- let a = _mm512_set1_ph(10.0);
- let src = _mm512_set1_ph(20.0);
- let r = _mm512_mask_getmant_round_ph::<
- _MM_MANT_NORM_P75_1P5,
- _MM_MANT_SIGN_NAN,
- _MM_FROUND_NO_EXC,
- >(src, 0b01010101010101010101010101010101, a);
- let e = _mm512_set_ph(
- 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
- 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
- 20.0, 1.25, 20.0, 1.25,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_permutex2var_ph() {
+ let a = _mm256_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- assert_eq_m512h(r, e);
+ let b = _mm256_setr_ph(
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let idx = _mm256_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+ let r = _mm256_permutex2var_ph(a, idx, b);
+ let e = _mm256_setr_ph(
+ 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
+ 31.0,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_getmant_round_ph() {
- let a = _mm512_set1_ph(10.0);
- let r = _mm512_maskz_getmant_round_ph::<
- _MM_MANT_NORM_P75_1P5,
- _MM_MANT_SIGN_NAN,
- _MM_FROUND_NO_EXC,
- >(0b01010101010101010101010101010101, a);
- let e = _mm512_set_ph(
- 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
- 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
+ unsafe fn test_mm512_permutex2var_ph() {
+ let a = _mm512_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_setr_ph(
+ 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0,
+ 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0,
+ 61.0, 62.0, 63.0, 64.0,
+ );
+ let idx = _mm512_set_epi16(
+ 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, 24, 22, 20,
+ 18, 16, 14, 12, 10, 8, 6, 4, 2, 0,
+ );
+ let r = _mm512_permutex2var_ph(a, idx, b);
+ let e = _mm512_setr_ph(
+ 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
+ 31.0, 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0, 49.0, 51.0, 53.0, 55.0, 57.0,
+ 59.0, 61.0, 63.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_getmant_sh() {
- let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a, b);
- let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_getmant_sh() {
- let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
- let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0, a, b);
- let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 1, a, b);
- let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_getmant_sh() {
- let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0, a, b);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(1, a, b);
- let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_permutexvar_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let idx = _mm_set_epi16(0, 2, 4, 6, 1, 3, 5, 7);
+ let r = _mm_permutexvar_ph(idx, a);
+ let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 2.0, 4.0, 6.0, 8.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_getmant_round_sh() {
- let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_getmant_round_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
- a, b,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_permutexvar_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_getmant_round_sh() {
- let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
- let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_getmant_round_sh::<
- _MM_MANT_NORM_P75_1P5,
- _MM_MANT_SIGN_NAN,
- _MM_FROUND_NO_EXC,
- >(src, 0, a, b);
- let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_getmant_round_sh::<
- _MM_MANT_NORM_P75_1P5,
- _MM_MANT_SIGN_NAN,
- _MM_FROUND_NO_EXC,
- >(src, 1, a, b);
- let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ let idx = _mm256_set_epi16(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
+ let r = _mm256_permutexvar_ph(idx, a);
+ let e = _mm256_setr_ph(
+ 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_getmant_round_sh() {
- let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_maskz_getmant_round_sh::<
- _MM_MANT_NORM_P75_1P5,
- _MM_MANT_SIGN_NAN,
- _MM_FROUND_NO_EXC,
- >(0, a, b);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_getmant_round_sh::<
- _MM_MANT_NORM_P75_1P5,
- _MM_MANT_SIGN_NAN,
- _MM_FROUND_NO_EXC,
- >(1, a, b);
- let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_permutexvar_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let idx = _mm512_set_epi16(
+ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 1, 3, 5, 7, 9, 11, 13, 15,
+ 17, 19, 21, 23, 25, 27, 29, 31,
+ );
+ let r = _mm512_permutexvar_ph(idx, a);
+ let e = _mm512_setr_ph(
+ 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
+ 31.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0,
+ 30.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_roundscale_ph() {
- let a = _mm_set1_ph(1.1);
- let r = _mm_roundscale_ph::<0>(a);
- let e = _mm_set1_ph(1.0);
+ unsafe fn test_mm_cvtepi16_ph() {
+ let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm_cvtepi16_ph(a);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_roundscale_ph() {
- let a = _mm_set1_ph(1.1);
- let src = _mm_set1_ph(2.0);
- let r = _mm_mask_roundscale_ph::<0>(src, 0b01010101, a);
- let e = _mm_set_ph(2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0);
+ unsafe fn test_mm_mask_cvtepi16_ph() {
+ let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_cvtepi16_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_roundscale_ph() {
- let a = _mm_set1_ph(1.1);
- let r = _mm_maskz_roundscale_ph::<0>(0b01010101, a);
- let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+ unsafe fn test_mm_maskz_cvtepi16_ph() {
+ let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm_maskz_cvtepi16_ph(0b01010101, a);
+ let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_roundscale_ph() {
- let a = _mm256_set1_ph(1.1);
- let r = _mm256_roundscale_ph::<0>(a);
- let e = _mm256_set1_ph(1.0);
+ unsafe fn test_mm256_cvtepi16_ph() {
+ let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm256_cvtepi16_ph(a);
+ let e = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_roundscale_ph() {
- let a = _mm256_set1_ph(1.1);
- let src = _mm256_set1_ph(2.0);
- let r = _mm256_mask_roundscale_ph::<0>(src, 0b0101010101010101, a);
+ unsafe fn test_mm256_mask_cvtepi16_ph() {
+ let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let src = _mm256_set_ph(
+ 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
+ );
+ let r = _mm256_mask_cvtepi16_ph(src, 0b0101010101010101, a);
let e = _mm256_set_ph(
- 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
+ 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_roundscale_ph() {
- let a = _mm256_set1_ph(1.1);
- let r = _mm256_maskz_roundscale_ph::<0>(0b0101010101010101, a);
+ unsafe fn test_mm256_maskz_cvtepi16_ph() {
+ let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm256_maskz_cvtepi16_ph(0b0101010101010101, a);
let e = _mm256_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16.,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_roundscale_ph() {
- let a = _mm512_set1_ph(1.1);
- let r = _mm512_roundscale_ph::<0>(a);
- let e = _mm512_set1_ph(1.0);
+ unsafe fn test_mm512_cvtepi16_ph() {
+ let a = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ let r = _mm512_cvtepi16_ph(a);
+ let e = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_roundscale_ph() {
- let a = _mm512_set1_ph(1.1);
- let src = _mm512_set1_ph(2.0);
- let r = _mm512_mask_roundscale_ph::<0>(src, 0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_mask_cvtepi16_ph() {
+ let a = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ let src = _mm512_set_ph(
+ 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
+ 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
+ );
+ let r = _mm512_mask_cvtepi16_ph(src, 0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
- 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
+ 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
+ 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_roundscale_ph() {
- let a = _mm512_set1_ph(1.1);
- let r = _mm512_maskz_roundscale_ph::<0>(0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_maskz_cvtepi16_ph() {
+ let a = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ let r = _mm512_maskz_cvtepi16_ph(0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
+ 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_roundscale_round_ph() {
- let a = _mm512_set1_ph(1.1);
- let r = _mm512_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(a);
- let e = _mm512_set1_ph(1.0);
+ unsafe fn test_mm512_cvt_roundepi16_ph() {
+ let a = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ let r = _mm512_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_roundscale_round_ph() {
- let a = _mm512_set1_ph(1.1);
- let src = _mm512_set1_ph(2.0);
- let r = _mm512_mask_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
+ unsafe fn test_mm512_mask_cvt_roundepi16_ph() {
+ let a = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ let src = _mm512_set_ph(
+ 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
+ 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
+ );
+ let r = _mm512_mask_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
0b01010101010101010101010101010101,
a,
);
let e = _mm512_set_ph(
- 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
- 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
+ 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
+ 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_roundscale_round_ph() {
- let a = _mm512_set1_ph(1.1);
- let r = _mm512_maskz_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
+ unsafe fn test_mm512_maskz_cvt_roundepi16_ph() {
+ let a = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ let r = _mm512_maskz_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
);
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
+ 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_roundscale_sh() {
- let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_roundscale_sh::<0>(a, b);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_roundscale_sh() {
- let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
- let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_roundscale_sh::<0>(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_roundscale_sh::<0>(src, 1, a, b);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_roundscale_sh() {
- let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_maskz_roundscale_sh::<0>(0, a, b);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_roundscale_sh::<0>(1, a, b);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_roundscale_round_sh() {
- let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(a, b);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_roundscale_round_sh() {
- let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
- let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 1, a, b);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_roundscale_round_sh() {
- let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(0, a, b);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(1, a, b);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_scalef_ph() {
- let a = _mm_set1_ph(1.);
- let b = _mm_set1_ph(3.);
- let r = _mm_scalef_ph(a, b);
- let e = _mm_set1_ph(8.0);
+ unsafe fn test_mm_cvtepu16_ph() {
+ let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm_cvtepu16_ph(a);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_scalef_ph() {
- let a = _mm_set1_ph(1.);
- let b = _mm_set1_ph(3.);
- let src = _mm_set1_ph(2.);
- let r = _mm_mask_scalef_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0);
+ unsafe fn test_mm_mask_cvtepu16_ph() {
+ let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_cvtepu16_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_scalef_ph() {
- let a = _mm_set1_ph(1.);
- let b = _mm_set1_ph(3.);
- let r = _mm_maskz_scalef_ph(0b01010101, a, b);
- let e = _mm_set_ph(0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0);
+ unsafe fn test_mm_maskz_cvtepu16_ph() {
+ let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm_maskz_cvtepu16_ph(0b01010101, a);
+ let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_scalef_ph() {
- let a = _mm256_set1_ph(1.);
- let b = _mm256_set1_ph(3.);
- let r = _mm256_scalef_ph(a, b);
- let e = _mm256_set1_ph(8.0);
+ unsafe fn test_mm256_cvtepu16_ph() {
+ let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm256_cvtepu16_ph(a);
+ let e = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_scalef_ph() {
- let a = _mm256_set1_ph(1.);
- let b = _mm256_set1_ph(3.);
- let src = _mm256_set1_ph(2.);
- let r = _mm256_mask_scalef_ph(src, 0b0101010101010101, a, b);
+ unsafe fn test_mm256_mask_cvtepu16_ph() {
+ let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let src = _mm256_set_ph(
+ 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
+ );
+ let r = _mm256_mask_cvtepu16_ph(src, 0b0101010101010101, a);
let e = _mm256_set_ph(
- 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
+ 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_scalef_ph() {
- let a = _mm256_set1_ph(1.);
- let b = _mm256_set1_ph(3.);
- let r = _mm256_maskz_scalef_ph(0b0101010101010101, a, b);
+ unsafe fn test_mm256_maskz_cvtepu16_ph() {
+ let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm256_maskz_cvtepu16_ph(0b0101010101010101, a);
let e = _mm256_set_ph(
- 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
+ 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16.,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_scalef_ph() {
- let a = _mm512_set1_ph(1.);
- let b = _mm512_set1_ph(3.);
- let r = _mm512_scalef_ph(a, b);
- let e = _mm512_set1_ph(8.0);
+ unsafe fn test_mm512_cvtepu16_ph() {
+ let a = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ let r = _mm512_cvtepu16_ph(a);
+ let e = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_scalef_ph() {
- let a = _mm512_set1_ph(1.);
- let b = _mm512_set1_ph(3.);
- let src = _mm512_set1_ph(2.);
- let r = _mm512_mask_scalef_ph(src, 0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_mask_cvtepu16_ph() {
+ let a = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ let src = _mm512_set_ph(
+ 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
+ 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
+ );
+ let r = _mm512_mask_cvtepu16_ph(src, 0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
- 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
+ 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
+ 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_scalef_ph() {
- let a = _mm512_set1_ph(1.);
- let b = _mm512_set1_ph(3.);
- let r = _mm512_maskz_scalef_ph(0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_maskz_cvtepu16_ph() {
+ let a = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ let r = _mm512_maskz_cvtepu16_ph(0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
- 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
+ 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
+ 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_scalef_round_ph() {
- let a = _mm512_set1_ph(1.);
- let b = _mm512_set1_ph(3.);
- let r = _mm512_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(8.0);
+ unsafe fn test_mm512_cvt_roundepu16_ph() {
+ let a = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ let r = _mm512_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_scalef_round_ph() {
- let a = _mm512_set1_ph(1.);
- let b = _mm512_set1_ph(3.);
- let src = _mm512_set1_ph(2.);
- let r = _mm512_mask_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_cvt_roundepu16_ph() {
+ let a = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ let src = _mm512_set_ph(
+ 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
+ 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
+ );
+ let r = _mm512_mask_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
0b01010101010101010101010101010101,
a,
- b,
);
let e = _mm512_set_ph(
- 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
- 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
+ 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
+ 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_scalef_round_ph() {
- let a = _mm512_set1_ph(1.);
- let b = _mm512_set1_ph(3.);
- let r = _mm512_maskz_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_cvt_roundepu16_ph() {
+ let a = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ let r = _mm512_maskz_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
- b,
);
let e = _mm512_set_ph(
- 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
- 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
+ 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
+ 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvtepi32_ph() {
+ let a = _mm_set_epi32(1, 2, 3, 4);
+ let r = _mm_cvtepi32_ph(a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvtepi32_ph() {
+ let a = _mm_set_epi32(1, 2, 3, 4);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_cvtepi32_ph(src, 0b0101, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepi32_ph() {
+ let a = _mm_set_epi32(1, 2, 3, 4);
+ let r = _mm_maskz_cvtepi32_ph(0b0101, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvtepi32_ph() {
+ let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm256_cvtepi32_ph(a);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepi32_ph() {
+ let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm256_mask_cvtepi32_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepi32_ph() {
+ let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm256_maskz_cvtepi32_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtepi32_ph() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_cvtepi32_ph(a);
+ let e = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- assert_eq_m512h(r, e);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_scalef_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_scalef_sh(a, b);
- let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_mask_cvtepi32_ph() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let src = _mm256_set_ph(
+ 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
+ );
+ let r = _mm512_mask_cvtepi32_ph(src, 0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_scalef_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
- let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_scalef_sh(src, 0, a, b);
- let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_scalef_sh(src, 1, a, b);
- let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_maskz_cvtepi32_ph() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_maskz_cvtepi32_ph(0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_scalef_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_maskz_scalef_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_scalef_sh(1, a, b);
- let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_cvt_roundepi32_ph() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_scalef_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_mask_cvt_roundepi32_ph() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let src = _mm256_set_ph(
+ 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
+ );
+ let r = _mm512_mask_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b0101010101010101,
+ a,
+ );
+ let e = _mm256_set_ph(
+ 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_scalef_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
- let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
+ unsafe fn test_mm512_maskz_cvt_roundepi32_ph() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_maskz_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
+ a,
);
- let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
+ let e = _mm256_set_ph(
+ 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
);
- let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_scalef_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
- let r =
- _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_cvti32_sh() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvti32_sh(a, 10);
+ let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
assert_eq_m128h(r, e);
- let r =
- _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvt_roundi32_sh() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvt_roundi32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
+ let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_reduce_ph() {
- let a = _mm_set1_ph(1.25);
- let r = _mm_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
- let e = _mm_set1_ph(0.25);
+ unsafe fn test_mm_cvtepu32_ph() {
+ let a = _mm_set_epi32(1, 2, 3, 4);
+ let r = _mm_cvtepu32_ph(a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_reduce_ph() {
- let a = _mm_set1_ph(1.25);
- let src = _mm_set1_ph(2.0);
- let r = _mm_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01010101, a);
- let e = _mm_set_ph(2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25);
+ unsafe fn test_mm_mask_cvtepu32_ph() {
+ let a = _mm_set_epi32(1, 2, 3, 4);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_cvtepu32_ph(src, 0b0101, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_reduce_ph() {
- let a = _mm_set1_ph(1.25);
- let r = _mm_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01010101, a);
- let e = _mm_set_ph(0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25);
+ unsafe fn test_mm_maskz_cvtepu32_ph() {
+ let a = _mm_set_epi32(1, 2, 3, 4);
+ let r = _mm_maskz_cvtepu32_ph(0b0101, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_reduce_ph() {
- let a = _mm256_set1_ph(1.25);
- let r = _mm256_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
- let e = _mm256_set1_ph(0.25);
- assert_eq_m256h(r, e);
+ unsafe fn test_mm256_cvtepu32_ph() {
+ let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm256_cvtepu32_ph(a);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_reduce_ph() {
- let a = _mm256_set1_ph(1.25);
- let src = _mm256_set1_ph(2.0);
- let r = _mm256_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0101010101010101, a);
- let e = _mm256_set_ph(
- 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
- );
- assert_eq_m256h(r, e);
+ unsafe fn test_mm256_mask_cvtepu32_ph() {
+ let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm256_mask_cvtepu32_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_reduce_ph() {
- let a = _mm256_set1_ph(1.25);
- let r = _mm256_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0101010101010101, a);
- let e = _mm256_set_ph(
- 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
- );
- assert_eq_m256h(r, e);
+ unsafe fn test_mm256_maskz_cvtepu32_ph() {
+ let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm256_maskz_cvtepu32_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_reduce_ph() {
- let a = _mm512_set1_ph(1.25);
- let r = _mm512_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
- let e = _mm512_set1_ph(0.25);
- assert_eq_m512h(r, e);
+ unsafe fn test_mm512_cvtepu32_ph() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_cvtepu32_ph(a);
+ let e = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_reduce_ph() {
- let a = _mm512_set1_ph(1.25);
- let src = _mm512_set1_ph(2.0);
- let r = _mm512_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
- src,
- 0b01010101010101010101010101010101,
- a,
+ unsafe fn test_mm512_mask_cvtepu32_ph() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let src = _mm256_set_ph(
+ 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
);
- let e = _mm512_set_ph(
- 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
- 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
+ let r = _mm512_mask_cvtepu32_ph(src, 0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0,
);
- assert_eq_m512h(r, e);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_reduce_ph() {
- let a = _mm512_set1_ph(1.25);
- let r = _mm512_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
- 0b01010101010101010101010101010101,
- a,
- );
- let e = _mm512_set_ph(
- 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
- 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
+ unsafe fn test_mm512_maskz_cvtepu32_ph() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_maskz_cvtepu32_ph(0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
);
- assert_eq_m512h(r, e);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_reduce_round_ph() {
- let a = _mm512_set1_ph(1.25);
- let r = _mm512_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
- let e = _mm512_set1_ph(0.25);
- assert_eq_m512h(r, e);
+ unsafe fn test_mm512_cvt_roundepu32_ph() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_reduce_round_ph() {
- let a = _mm512_set1_ph(1.25);
- let src = _mm512_set1_ph(2.0);
- let r = _mm512_mask_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
+ unsafe fn test_mm512_mask_cvt_roundepu32_ph() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let src = _mm256_set_ph(
+ 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
+ );
+ let r = _mm512_mask_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
);
- let e = _mm512_set_ph(
- 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
- 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
+ let e = _mm256_set_ph(
+ 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
+ 16.0,
);
- assert_eq_m512h(r, e);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_reduce_round_ph() {
- let a = _mm512_set1_ph(1.25);
- let r = _mm512_maskz_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_cvt_roundepu32_ph() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_maskz_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
);
- let e = _mm512_set_ph(
- 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
- 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
+ let e = _mm256_set_ph(
+ 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_reduce_sh() {
- let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
- let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_reduce_sh() {
- let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
- let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0, a, b);
- let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 1, a, b);
- let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_reduce_sh() {
- let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(0, a, b);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(1, a, b);
- let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_cvtu32_sh() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvtu32_sh(a, 10);
+ let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_reduce_round_sh() {
- let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
- let r = _mm_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
- let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_cvt_roundu32_sh() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvt_roundu32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
+ let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_reduce_round_sh() {
- let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
- let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
- src, 1, a, b,
- );
- let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvtepi64_ph() {
+ let a = _mm_set_epi64x(1, 2);
+ let r = _mm_cvtepi64_ph(a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_reduce_round_sh() {
- let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
- let r =
- _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0, a, b);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r =
- _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(1, a, b);
- let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvtepi64_ph() {
+ let a = _mm_set_epi64x(1, 2);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_cvtepi64_ph(src, 0b01, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_reduce_add_ph() {
- let a = _mm_set1_ph(2.0);
- let r = _mm_reduce_add_ph(a);
- assert_eq!(r, 16.0);
+ unsafe fn test_mm_maskz_cvtepi64_ph() {
+ let a = _mm_set_epi64x(1, 2);
+ let r = _mm_maskz_cvtepi64_ph(0b01, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_reduce_add_ph() {
- let a = _mm256_set1_ph(2.0);
- let r = _mm256_reduce_add_ph(a);
- assert_eq!(r, 32.0);
+ unsafe fn test_mm256_cvtepi64_ph() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let r = _mm256_cvtepi64_ph(a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_reduce_add_ph() {
- let a = _mm512_set1_ph(2.0);
- let r = _mm512_reduce_add_ph(a);
- assert_eq!(r, 64.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepi64_ph() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm256_mask_cvtepi64_ph(src, 0b0101, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_reduce_mul_ph() {
- let a = _mm_set1_ph(2.0);
- let r = _mm_reduce_mul_ph(a);
- assert_eq!(r, 256.0);
+ unsafe fn test_mm256_maskz_cvtepi64_ph() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let r = _mm256_maskz_cvtepi64_ph(0b0101, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_reduce_mul_ph() {
- let a = _mm256_set1_ph(2.0);
- let r = _mm256_reduce_mul_ph(a);
- assert_eq!(r, 65536.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtepi64_ph() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm512_cvtepi64_ph(a);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_reduce_mul_ph() {
- let a = _mm512_set1_ph(2.0);
- let r = _mm512_reduce_mul_ph(a);
- assert_eq!(r, 16777216.0);
+ unsafe fn test_mm512_mask_cvtepi64_ph() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm512_mask_cvtepi64_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_reduce_max_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let r = _mm_reduce_max_ph(a);
- assert_eq!(r, 8.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtepi64_ph() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm512_maskz_cvtepi64_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_reduce_max_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvt_roundepi64_ph() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm512_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvt_roundepi64_ph() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm512_mask_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0b01010101, a,
);
- let r = _mm256_reduce_max_ph(a);
- assert_eq!(r, 16.0);
+ let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_reduce_max_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
+ unsafe fn test_mm512_maskz_cvt_roundepi64_ph() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm512_maskz_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101, a,
);
- let r = _mm512_reduce_max_ph(a);
- assert_eq!(r, 32.0);
+ let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_reduce_min_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let r = _mm_reduce_min_ph(a);
- assert_eq!(r, 1.0);
+ unsafe fn test_mm_cvtepu64_ph() {
+ let a = _mm_set_epi64x(1, 2);
+ let r = _mm_cvtepu64_ph(a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_reduce_min_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let r = _mm256_reduce_min_ph(a);
- assert_eq!(r, 1.0);
+ unsafe fn test_mm_mask_cvtepu64_ph() {
+ let a = _mm_set_epi64x(1, 2);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_cvtepu64_ph(src, 0b01, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_reduce_min_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let r = _mm512_reduce_min_ph(a);
- assert_eq!(r, 1.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepu64_ph() {
+ let a = _mm_set_epi64x(1, 2);
+ let r = _mm_maskz_cvtepu64_ph(0b01, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fpclass_ph_mask() {
- let a = _mm_set_ph(
- 1.,
- f16::INFINITY,
- f16::NEG_INFINITY,
- 0.0,
- -0.0,
- -2.0,
- f16::NAN,
- 5.9e-8, // Denormal
- );
- let r = _mm_fpclass_ph_mask::<0x18>(a); // infinities
- assert_eq!(r, 0b01100000);
+ unsafe fn test_mm256_cvtepu64_ph() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let r = _mm256_cvtepu64_ph(a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fpclass_ph_mask() {
- let a = _mm_set_ph(
- 1.,
- f16::INFINITY,
- f16::NEG_INFINITY,
- 0.0,
- -0.0,
- -2.0,
- f16::NAN,
- 5.9e-8, // Denormal
- );
- let r = _mm_mask_fpclass_ph_mask::<0x18>(0b01010101, a);
- assert_eq!(r, 0b01000000);
+ unsafe fn test_mm256_mask_cvtepu64_ph() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm256_mask_cvtepu64_ph(src, 0b0101, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fpclass_ph_mask() {
- let a = _mm256_set_ph(
- 1.,
- f16::INFINITY,
- f16::NEG_INFINITY,
- 0.0,
- -0.0,
- -2.0,
- f16::NAN,
- 5.9e-8, // Denormal
- 1.,
- f16::INFINITY,
- f16::NEG_INFINITY,
- 0.0,
- -0.0,
- -2.0,
- f16::NAN,
- 5.9e-8, // Denormal
- );
- let r = _mm256_fpclass_ph_mask::<0x18>(a); // infinities
- assert_eq!(r, 0b0110000001100000);
+ unsafe fn test_mm256_maskz_cvtepu64_ph() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let r = _mm256_maskz_cvtepu64_ph(0b0101, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fpclass_ph_mask() {
- let a = _mm256_set_ph(
- 1.,
- f16::INFINITY,
- f16::NEG_INFINITY,
- 0.0,
- -0.0,
- -2.0,
- f16::NAN,
- 5.9e-8, // Denormal
- 1.,
- f16::INFINITY,
- f16::NEG_INFINITY,
- 0.0,
- -0.0,
- -2.0,
- f16::NAN,
- 5.9e-8, // Denormal
- );
- let r = _mm256_mask_fpclass_ph_mask::<0x18>(0b0101010101010101, a);
- assert_eq!(r, 0b0100000001000000);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtepu64_ph() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm512_cvtepu64_ph(a);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fpclass_ph_mask() {
- let a = _mm512_set_ph(
- 1.,
- f16::INFINITY,
- f16::NEG_INFINITY,
- 0.0,
- -0.0,
- -2.0,
- f16::NAN,
- 5.9e-8, // Denormal
- 1.,
- f16::INFINITY,
- f16::NEG_INFINITY,
- 0.0,
- -0.0,
- -2.0,
- f16::NAN,
- 5.9e-8, // Denormal
- 1.,
- f16::INFINITY,
- f16::NEG_INFINITY,
- 0.0,
- -0.0,
- -2.0,
- f16::NAN,
- 5.9e-8, // Denormal
- 1.,
- f16::INFINITY,
- f16::NEG_INFINITY,
- 0.0,
- -0.0,
- -2.0,
- f16::NAN,
- 5.9e-8, // Denormal
+ unsafe fn test_mm512_mask_cvtepu64_ph() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm512_mask_cvtepu64_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtepu64_ph() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm512_maskz_cvtepu64_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvt_roundepu64_ph() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm512_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvt_roundepu64_ph() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm512_mask_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0b01010101, a,
);
- let r = _mm512_fpclass_ph_mask::<0x18>(a); // infinities
- assert_eq!(r, 0b01100000011000000110000001100000);
+ let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fpclass_ph_mask() {
- let a = _mm512_set_ph(
- 1.,
- f16::INFINITY,
- f16::NEG_INFINITY,
- 0.0,
- -0.0,
- -2.0,
- f16::NAN,
- 5.9e-8, // Denormal
- 1.,
- f16::INFINITY,
- f16::NEG_INFINITY,
- 0.0,
- -0.0,
- -2.0,
- f16::NAN,
- 5.9e-8, // Denormal
- 1.,
- f16::INFINITY,
- f16::NEG_INFINITY,
- 0.0,
- -0.0,
- -2.0,
- f16::NAN,
- 5.9e-8, // Denormal
- 1.,
- f16::INFINITY,
- f16::NEG_INFINITY,
- 0.0,
- -0.0,
- -2.0,
- f16::NAN,
- 5.9e-8, // Denormal
+ unsafe fn test_mm512_maskz_cvt_roundepu64_ph() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm512_maskz_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101, a,
);
- let r = _mm512_mask_fpclass_ph_mask::<0x18>(0b01010101010101010101010101010101, a);
- assert_eq!(r, 0b01000000010000000100000001000000);
+ let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvtxps_ph() {
+ let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
+ let r = _mm_cvtxps_ph(a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fpclass_sh_mask() {
- let a = _mm_set_sh(f16::INFINITY);
- let r = _mm_fpclass_sh_mask::<0x18>(a);
- assert_eq!(r, 1);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvtxps_ph() {
+ let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_cvtxps_ph(src, 0b0101, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16., 4.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fpclass_sh_mask() {
- let a = _mm_set_sh(f16::INFINITY);
- let r = _mm_mask_fpclass_sh_mask::<0x18>(0, a);
- assert_eq!(r, 0);
- let r = _mm_mask_fpclass_sh_mask::<0x18>(1, a);
- assert_eq!(r, 1);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvtxps_ph() {
+ let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
+ let r = _mm_maskz_cvtxps_ph(0b0101, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_blend_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0);
- let r = _mm_mask_blend_ph(0b01010101, a, b);
- let e = _mm_set_ph(1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0);
+ unsafe fn test_mm256_cvtxps_ph() {
+ let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm256_cvtxps_ph(a);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_blend_ph() {
- let a = _mm256_set_ph(
+ unsafe fn test_mm256_mask_cvtxps_ph() {
+ let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm256_mask_cvtxps_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtxps_ph() {
+ let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm256_maskz_cvtxps_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtxps_ph() {
+ let a = _mm512_set_ps(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- let b = _mm256_set_ph(
- -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0,
- -14.0, -15.0, -16.0,
- );
- let r = _mm256_mask_blend_ph(0b0101010101010101, a, b);
+ let r = _mm512_cvtxps_ph(a);
let e = _mm256_set_ph(
- 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0,
- -16.0,
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_blend_ph() {
- let a = _mm512_set_ph(
+ unsafe fn test_mm512_mask_cvtxps_ph() {
+ let a = _mm512_set_ps(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
);
- let b = _mm512_set_ph(
- -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0,
- -14.0, -15.0, -16.0, -17.0, -18.0, -19.0, -20.0, -21.0, -22.0, -23.0, -24.0, -25.0,
- -26.0, -27.0, -28.0, -29.0, -30.0, -31.0, -32.0,
+ let src = _mm256_set_ph(
+ 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
);
- let r = _mm512_mask_blend_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0,
- -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0, 27.0, -28.0,
- 29.0, -30.0, 31.0, -32.0,
+ let r = _mm512_mask_cvtxps_ph(src, 0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0,
);
- assert_eq_m512h(r, e);
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_permutex2var_ph() {
- let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_setr_ph(9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let idx = _mm_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14);
- let r = _mm_permutex2var_ph(a, idx, b);
- let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtxps_ph() {
+ let a = _mm512_set_ps(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_maskz_cvtxps_ph(0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_permutex2var_ph() {
- let a = _mm256_setr_ph(
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtx_roundps_ph() {
+ let a = _mm512_set_ps(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- let b = _mm256_setr_ph(
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
+ let r = _mm512_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- let idx = _mm256_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
- let r = _mm256_permutex2var_ph(a, idx, b);
- let e = _mm256_setr_ph(
- 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
- 31.0,
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtx_roundps_ph() {
+ let a = _mm512_set_ps(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let src = _mm256_set_ph(
+ 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
+ );
+ let r = _mm512_mask_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b0101010101010101,
+ a,
+ );
+ let e = _mm256_set_ph(
+ 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
+ 16.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_permutex2var_ph() {
- let a = _mm512_setr_ph(
+ unsafe fn test_mm512_maskz_cvtx_roundps_ph() {
+ let a = _mm512_set_ps(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
);
- let b = _mm512_setr_ph(
- 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0,
- 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0,
- 61.0, 62.0, 63.0, 64.0,
+ let r = _mm512_maskz_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
+ a,
);
- let idx = _mm512_set_epi16(
- 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, 24, 22, 20,
- 18, 16, 14, 12, 10, 8, 6, 4, 2, 0,
+ let e = _mm256_set_ph(
+ 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
);
- let r = _mm512_permutex2var_ph(a, idx, b);
- let e = _mm512_setr_ph(
- 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
- 31.0, 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0, 49.0, 51.0, 53.0, 55.0, 57.0,
- 59.0, 61.0, 63.0,
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvtss_sh() {
+ let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+ let r = _mm_cvtss_sh(a, b);
+ let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cvtss_sh() {
+ let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+ let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
+ let r = _mm_mask_cvtss_sh(src, 0, a, b);
+ let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_cvtss_sh(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_cvtss_sh() {
+ let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+ let r = _mm_maskz_cvtss_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_cvtss_sh(1, a, b);
+ let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvt_roundss_sh() {
+ let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+ let r = _mm_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cvt_roundss_sh() {
+ let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+ let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
+ let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_cvt_roundss_sh() {
+ let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+ let r =
+ _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_permutexvar_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let idx = _mm_set_epi16(0, 2, 4, 6, 1, 3, 5, 7);
- let r = _mm_permutexvar_ph(idx, a);
- let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 2.0, 4.0, 6.0, 8.0);
+ unsafe fn test_mm_cvtpd_ph() {
+ let a = _mm_set_pd(1.0, 2.0);
+ let r = _mm_cvtpd_ph(a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_permutexvar_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let idx = _mm256_set_epi16(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
- let r = _mm256_permutexvar_ph(idx, a);
- let e = _mm256_setr_ph(
- 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0,
+ unsafe fn test_mm_mask_cvtpd_ph() {
+ let a = _mm_set_pd(1.0, 2.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_cvtpd_ph(src, 0b01, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvtpd_ph() {
+ let a = _mm_set_pd(1.0, 2.0);
+ let r = _mm_maskz_cvtpd_ph(0b01, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvtpd_ph() {
+ let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_cvtpd_ph(a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvtpd_ph() {
+ let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm256_mask_cvtpd_ph(src, 0b0101, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtpd_ph() {
+ let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_maskz_cvtpd_ph(0b0101, a);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtpd_ph() {
+ let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_cvtpd_ph(a);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtpd_ph() {
+ let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm512_mask_cvtpd_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtpd_ph() {
+ let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_maskz_cvtpd_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvt_roundpd_ph() {
+ let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvt_roundpd_ph() {
+ let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm512_mask_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0b01010101, a,
);
- assert_eq_m256h(r, e);
+ let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_permutexvar_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
+ unsafe fn test_mm512_maskz_cvt_roundpd_ph() {
+ let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_maskz_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101, a,
);
- let idx = _mm512_set_epi16(
- 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 1, 3, 5, 7, 9, 11, 13, 15,
- 17, 19, 21, 23, 25, 27, 29, 31,
+ let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvtsd_sh() {
+ let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let b = _mm_setr_pd(1.0, 2.0);
+ let r = _mm_cvtsd_sh(a, b);
+ let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cvtsd_sh() {
+ let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let b = _mm_setr_pd(1.0, 2.0);
+ let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
+ let r = _mm_mask_cvtsd_sh(src, 0, a, b);
+ let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_cvtsd_sh(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_cvtsd_sh() {
+ let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let b = _mm_setr_pd(1.0, 2.0);
+ let r = _mm_maskz_cvtsd_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_cvtsd_sh(1, a, b);
+ let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvt_roundsd_sh() {
+ let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let b = _mm_setr_pd(1.0, 2.0);
+ let r = _mm_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cvt_roundsd_sh() {
+ let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let b = _mm_setr_pd(1.0, 2.0);
+ let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
+ let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
);
- let r = _mm512_permutexvar_ph(idx, a);
- let e = _mm512_setr_ph(
- 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
- 31.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0,
- 30.0, 32.0,
+ let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_cvt_roundsd_sh() {
+ let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let b = _mm_setr_pd(1.0, 2.0);
+ let r =
+ _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
+ assert_eq_m128h(r, e);
}
}
diff --git a/crates/core_arch/src/x86_64/avx512fp16.rs b/crates/core_arch/src/x86_64/avx512fp16.rs
new file mode 100644
index 0000000000..ebd85ed4ad
--- /dev/null
+++ b/crates/core_arch/src/x86_64/avx512fp16.rs
@@ -0,0 +1,129 @@
+use crate::core_arch::x86::*;
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Convert the signed 64-bit integer b to a half-precision (16-bit) floating-point element, store the
+/// result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements
+/// of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti64_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsi2sh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h {
+ vcvtsi642sh(a, b, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the signed 64-bit integer b to a half-precision (16-bit) floating-point element, store the
+/// result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements
+/// of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvt_roundi64_sh(a: __m128h, b: i64) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ vcvtsi642sh(a, b, ROUNDING)
+}
+
+/// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the
+/// result in the lower element of dst, and copy the upper 1 packed elements from a to the upper elements
+/// of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtusi2sh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h {
+ vcvtusi642sh(a, b, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the
+/// result in the lower element of dst, and copy the upper 1 packed elements from a to the upper elements
+/// of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvt_roundu64_sh(a: __m128h, b: u64) -> __m128h {
+ static_assert_rounding!(ROUNDING);
+ vcvtusi642sh(a, b, ROUNDING)
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.avx512fp16.vcvtsi642sh"]
+ fn vcvtsi642sh(a: __m128h, b: i64, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.vcvtusi642sh"]
+ fn vcvtusi642sh(a: __m128h, b: u64, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.vcvtsh2si64"]
+ fn vcvtsh2si64(a: __m128h, rounding: i32) -> i64;
+ #[link_name = "llvm.x86.avx512fp16.vcvtsh2usi64"]
+ fn vcvtsh2usi64(a: __m128h, rounding: i32) -> u64;
+ #[link_name = "llvm.x86.avx512fp16.vcvttsh2si64"]
+ fn vcvttsh2si64(a: __m128h, sae: i32) -> i64;
+ #[link_name = "llvm.x86.avx512fp16.vcvttsh2usi64"]
+ fn vcvttsh2usi64(a: __m128h, sae: i32) -> u64;
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::{x86::*, x86_64::*};
+ use stdarch_test::simd_test;
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvti64_sh() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvti64_sh(a, 10);
+ let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvt_roundi64_sh() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvt_roundi64_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
+ let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvtu64_sh() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvtu64_sh(a, 10);
+ let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvt_roundu64_sh() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvt_roundu64_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
+ let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+}
diff --git a/crates/core_arch/src/x86_64/mod.rs b/crates/core_arch/src/x86_64/mod.rs
index 2e0139c5da..e4ad644edf 100644
--- a/crates/core_arch/src/x86_64/mod.rs
+++ b/crates/core_arch/src/x86_64/mod.rs
@@ -73,3 +73,7 @@ pub use self::adx::*;
mod bt;
#[stable(feature = "simd_x86_bittest", since = "1.55.0")]
pub use self::bt::*;
+
+mod avx512fp16;
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub use self::avx512fp16::*;
From 57641cc5088370828acd3a0315f7908047b63e81 Mon Sep 17 00:00:00 2001
From: sayantn
Date: Wed, 17 Jul 2024 17:17:20 +0530
Subject: [PATCH 09/11] AVX512FP16 Part 8: Convert from f16
---
crates/core_arch/missing-x86.md | 201 -
crates/core_arch/src/x86/avx512fp16.rs | 16924 +++++++++++++-------
crates/core_arch/src/x86_64/avx512fp16.rs | 180 +
3 files changed, 11072 insertions(+), 6233 deletions(-)
diff --git a/crates/core_arch/missing-x86.md b/crates/core_arch/missing-x86.md
index 1c2d0a6d7b..94ecc929ef 100644
--- a/crates/core_arch/missing-x86.md
+++ b/crates/core_arch/missing-x86.md
@@ -56,217 +56,16 @@
* [ ] [`_mm256_cvtsh_h`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsh_h)
* [ ] [`_mm256_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_pch)
* [ ] [`_mm512_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask)
- * [ ] [`_mm512_cvt_roundph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi16)
- * [ ] [`_mm512_cvt_roundph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi32)
- * [ ] [`_mm512_cvt_roundph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi64)
- * [ ] [`_mm512_cvt_roundph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu16)
- * [ ] [`_mm512_cvt_roundph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu32)
- * [ ] [`_mm512_cvt_roundph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu64)
- * [ ] [`_mm512_cvt_roundph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_pd)
- * [ ] [`_mm512_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi16)
- * [ ] [`_mm512_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi32)
- * [ ] [`_mm512_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi64)
- * [ ] [`_mm512_cvtph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu16)
- * [ ] [`_mm512_cvtph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu32)
- * [ ] [`_mm512_cvtph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu64)
- * [ ] [`_mm512_cvtph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_pd)
* [ ] [`_mm512_cvtsh_h`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsh_h)
- * [ ] [`_mm512_cvtt_roundph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi16)
- * [ ] [`_mm512_cvtt_roundph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi32)
- * [ ] [`_mm512_cvtt_roundph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi64)
- * [ ] [`_mm512_cvtt_roundph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu16)
- * [ ] [`_mm512_cvtt_roundph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu32)
- * [ ] [`_mm512_cvtt_roundph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu64)
- * [ ] [`_mm512_cvttph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi16)
- * [ ] [`_mm512_cvttph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi32)
- * [ ] [`_mm512_cvttph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi64)
- * [ ] [`_mm512_cvttph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu16)
- * [ ] [`_mm512_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu32)
- * [ ] [`_mm512_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu64)
- * [ ] [`_mm512_cvtx_roundph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundph_ps)
- * [ ] [`_mm512_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps)
* [ ] [`_mm512_mask_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask)
- * [ ] [`_mm512_mask_cvt_roundph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi16)
- * [ ] [`_mm512_mask_cvt_roundph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi32)
- * [ ] [`_mm512_mask_cvt_roundph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi64)
- * [ ] [`_mm512_mask_cvt_roundph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu16)
- * [ ] [`_mm512_mask_cvt_roundph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu32)
- * [ ] [`_mm512_mask_cvt_roundph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu64)
- * [ ] [`_mm512_mask_cvt_roundph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_pd)
- * [ ] [`_mm512_mask_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi16)
- * [ ] [`_mm512_mask_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi32)
- * [ ] [`_mm512_mask_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi64)
- * [ ] [`_mm512_mask_cvtph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu16)
- * [ ] [`_mm512_mask_cvtph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu32)
- * [ ] [`_mm512_mask_cvtph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu64)
- * [ ] [`_mm512_mask_cvtph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_pd)
- * [ ] [`_mm512_mask_cvtt_roundph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi16)
- * [ ] [`_mm512_mask_cvtt_roundph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi32)
- * [ ] [`_mm512_mask_cvtt_roundph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi64)
- * [ ] [`_mm512_mask_cvtt_roundph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu16)
- * [ ] [`_mm512_mask_cvtt_roundph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu32)
- * [ ] [`_mm512_mask_cvtt_roundph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu64)
- * [ ] [`_mm512_mask_cvttph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi16)
- * [ ] [`_mm512_mask_cvttph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi32)
- * [ ] [`_mm512_mask_cvttph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi64)
- * [ ] [`_mm512_mask_cvttph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu16)
- * [ ] [`_mm512_mask_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu32)
- * [ ] [`_mm512_mask_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu64)
- * [ ] [`_mm512_mask_cvtx_roundph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundph_ps)
- * [ ] [`_mm512_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps)
- * [ ] [`_mm512_maskz_cvt_roundph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi16)
- * [ ] [`_mm512_maskz_cvt_roundph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi32)
- * [ ] [`_mm512_maskz_cvt_roundph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi64)
- * [ ] [`_mm512_maskz_cvt_roundph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu16)
- * [ ] [`_mm512_maskz_cvt_roundph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu32)
- * [ ] [`_mm512_maskz_cvt_roundph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu64)
- * [ ] [`_mm512_maskz_cvt_roundph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_pd)
- * [ ] [`_mm512_maskz_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi16)
- * [ ] [`_mm512_maskz_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi32)
- * [ ] [`_mm512_maskz_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi64)
- * [ ] [`_mm512_maskz_cvtph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu16)
- * [ ] [`_mm512_maskz_cvtph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu32)
- * [ ] [`_mm512_maskz_cvtph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu64)
- * [ ] [`_mm512_maskz_cvtph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_pd)
- * [ ] [`_mm512_maskz_cvtt_roundph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi16)
- * [ ] [`_mm512_maskz_cvtt_roundph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi32)
- * [ ] [`_mm512_maskz_cvtt_roundph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi64)
- * [ ] [`_mm512_maskz_cvtt_roundph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu16)
- * [ ] [`_mm512_maskz_cvtt_roundph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu32)
- * [ ] [`_mm512_maskz_cvtt_roundph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu64)
- * [ ] [`_mm512_maskz_cvttph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi16)
- * [ ] [`_mm512_maskz_cvttph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi32)
- * [ ] [`_mm512_maskz_cvttph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi64)
- * [ ] [`_mm512_maskz_cvttph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu16)
- * [ ] [`_mm512_maskz_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu32)
- * [ ] [`_mm512_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu64)
- * [ ] [`_mm512_maskz_cvtx_roundph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundph_ps)
- * [ ] [`_mm512_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps)
* [ ] [`_mm512_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_pch)
- * [ ] [`_mm_cvt_roundsh_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i32)
- * [ ] [`_mm_cvt_roundsh_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i64)
- * [ ] [`_mm_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_sd)
- * [ ] [`_mm_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_ss)
- * [ ] [`_mm_cvt_roundsh_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u32)
- * [ ] [`_mm_cvt_roundsh_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u64)
* [ ] [`_mm_cvtsh_h`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_h)
- * [ ] [`_mm_cvtsh_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i32)
- * [ ] [`_mm_cvtsh_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i64)
- * [ ] [`_mm_cvtsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_sd)
- * [ ] [`_mm_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_ss)
- * [ ] [`_mm_cvtsh_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u32)
- * [ ] [`_mm_cvtsh_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u64)
* [ ] [`_mm_cvtsi128_si16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si16)
* [ ] [`_mm_cvtsi16_si128`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi16_si128)
- * [ ] [`_mm_cvtt_roundsh_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i32)
- * [ ] [`_mm_cvtt_roundsh_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i64)
- * [ ] [`_mm_cvtt_roundsh_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u32)
- * [ ] [`_mm_cvtt_roundsh_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u64)
- * [ ] [`_mm_cvttsh_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i32)
- * [ ] [`_mm_cvttsh_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i64)
- * [ ] [`_mm_cvttsh_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u32)
- * [ ] [`_mm_cvttsh_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u64)
- * [ ] [`_mm_mask_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd)
- * [ ] [`_mm_mask_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss)
- * [ ] [`_mm_mask_cvtsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd)
- * [ ] [`_mm_mask_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss)
- * [ ] [`_mm_maskz_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd)
- * [ ] [`_mm_maskz_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss)
- * [ ] [`_mm_maskz_cvtsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd)
- * [ ] [`_mm_maskz_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss)
* [ ] [`_mm_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pch)
-["AVX512_FP16", "AVX512VL"]
-
- * [ ] [`_mm256_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi16)
- * [ ] [`_mm256_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi32)
- * [ ] [`_mm256_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi64)
- * [ ] [`_mm256_cvtph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu16)
- * [ ] [`_mm256_cvtph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu32)
- * [ ] [`_mm256_cvtph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu64)
- * [ ] [`_mm256_cvtph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_pd)
- * [ ] [`_mm256_cvttph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi16)
- * [ ] [`_mm256_cvttph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi32)
- * [ ] [`_mm256_cvttph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi64)
- * [ ] [`_mm256_cvttph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu16)
- * [ ] [`_mm256_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu32)
- * [ ] [`_mm256_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu64)
- * [ ] [`_mm256_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps)
- * [ ] [`_mm256_mask_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi16)
- * [ ] [`_mm256_mask_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi32)
- * [ ] [`_mm256_mask_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi64)
- * [ ] [`_mm256_mask_cvtph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu16)
- * [ ] [`_mm256_mask_cvtph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu32)
- * [ ] [`_mm256_mask_cvtph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu64)
- * [ ] [`_mm256_mask_cvtph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_pd)
- * [ ] [`_mm256_mask_cvttph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi16)
- * [ ] [`_mm256_mask_cvttph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi32)
- * [ ] [`_mm256_mask_cvttph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi64)
- * [ ] [`_mm256_mask_cvttph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu16)
- * [ ] [`_mm256_mask_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu32)
- * [ ] [`_mm256_mask_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu64)
- * [ ] [`_mm256_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps)
- * [ ] [`_mm256_maskz_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi16)
- * [ ] [`_mm256_maskz_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi32)
- * [ ] [`_mm256_maskz_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi64)
- * [ ] [`_mm256_maskz_cvtph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu16)
- * [ ] [`_mm256_maskz_cvtph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu32)
- * [ ] [`_mm256_maskz_cvtph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu64)
- * [ ] [`_mm256_maskz_cvtph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_pd)
- * [ ] [`_mm256_maskz_cvttph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi16)
- * [ ] [`_mm256_maskz_cvttph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi32)
- * [ ] [`_mm256_maskz_cvttph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi64)
- * [ ] [`_mm256_maskz_cvttph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu16)
- * [ ] [`_mm256_maskz_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu32)
- * [ ] [`_mm256_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64)
- * [ ] [`_mm256_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps)
- * [ ] [`_mm_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi16)
- * [ ] [`_mm_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi32)
- * [ ] [`_mm_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi64)
- * [ ] [`_mm_cvtph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu16)
- * [ ] [`_mm_cvtph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu32)
- * [ ] [`_mm_cvtph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu64)
- * [ ] [`_mm_cvtph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_pd)
- * [ ] [`_mm_cvttph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi16)
- * [ ] [`_mm_cvttph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi32)
- * [ ] [`_mm_cvttph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi64)
- * [ ] [`_mm_cvttph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu16)
- * [ ] [`_mm_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu32)
- * [ ] [`_mm_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu64)
- * [ ] [`_mm_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps)
- * [ ] [`_mm_mask_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi16)
- * [ ] [`_mm_mask_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi32)
- * [ ] [`_mm_mask_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi64)
- * [ ] [`_mm_mask_cvtph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu16)
- * [ ] [`_mm_mask_cvtph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu32)
- * [ ] [`_mm_mask_cvtph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu64)
- * [ ] [`_mm_mask_cvtph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_pd)
- * [ ] [`_mm_mask_cvttph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi16)
- * [ ] [`_mm_mask_cvttph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi32)
- * [ ] [`_mm_mask_cvttph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi64)
- * [ ] [`_mm_mask_cvttph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu16)
- * [ ] [`_mm_mask_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu32)
- * [ ] [`_mm_mask_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu64)
- * [ ] [`_mm_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps)
- * [ ] [`_mm_maskz_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi16)
- * [ ] [`_mm_maskz_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi32)
- * [ ] [`_mm_maskz_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi64)
- * [ ] [`_mm_maskz_cvtph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu16)
- * [ ] [`_mm_maskz_cvtph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu32)
- * [ ] [`_mm_maskz_cvtph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu64)
- * [ ] [`_mm_maskz_cvtph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_pd)
- * [ ] [`_mm_maskz_cvttph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi16)
- * [ ] [`_mm_maskz_cvttph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi32)
- * [ ] [`_mm_maskz_cvttph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi64)
- * [ ] [`_mm_maskz_cvttph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu16)
- * [ ] [`_mm_maskz_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu32)
- * [ ] [`_mm_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64)
- * [ ] [`_mm_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps)
-
-
-
["AVX512_VP2INTERSECT", "AVX512F"]
* [ ] [`_mm512_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi32)
diff --git a/crates/core_arch/src/x86/avx512fp16.rs b/crates/core_arch/src/x86/avx512fp16.rs
index be99002e51..86d38feaec 100644
--- a/crates/core_arch/src/x86/avx512fp16.rs
+++ b/crates/core_arch/src/x86/avx512fp16.rs
@@ -13079,158 +13079,2952 @@ pub unsafe fn _mm_maskz_cvt_roundsd_sh(
_mm_mask_cvt_roundsd_sh::(_mm_setzero_ph(), k, a, b)
}
-#[allow(improper_ctypes)]
-extern "C" {
- #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
- fn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
- #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
- fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtph_epi16(a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epi16(_mm_undefined_si128(), 0xff, a)
+}
- #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
- fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
- fn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
- fn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
- fn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvtph2w_128(a, src.as_i16x8(), k))
+}
- #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
- fn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
- fn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
- fn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
- fn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epi16(_mm_setzero_si128(), k, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
- fn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
- fn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
- fn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
- fn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtph_epi16(a: __m256h) -> __m256i {
+ _mm256_mask_cvtph_epi16(_mm256_undefined_si256(), 0xffff, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
- fn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
- fn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
- fn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
- fn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
+ transmute(vcvtph2w_256(a, src.as_i16x16(), k))
+}
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
- fn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
- fn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
- fn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
- fn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
- fn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
- fn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
- fn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
- fn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i {
+ _mm256_mask_cvtph_epi16(_mm256_setzero_si256(), k, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
- fn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
- fn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
- fn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
- fn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
- fn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
- -> __m512;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
- fn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
- -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
- fn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
- fn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtph_epi16(a: __m512h) -> __m512i {
+ _mm512_mask_cvtph_epi16(_mm512_undefined_epi32(), 0xffffffff, a)
+}
- #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
- fn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.fma.f16"]
- fn fmaf16(a: f16, b: f16, c: f16) -> f16; // TODO: use `crate::intrinsics::fmaf16` when it's available
- #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
- fn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
+ transmute(vcvtph2w_512(
+ a,
+ src.as_i16x32(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
- #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"]
- fn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"]
- fn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
- fn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i {
+ _mm512_mask_cvtph_epi16(_mm512_setzero_si512(), k, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128"]
- fn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256"]
- fn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512"]
- fn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.sh"]
- fn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundph_epi16(a: __m512h) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epi16::(_mm512_undefined_epi32(), 0xffffffff, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128"]
- fn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256"]
- fn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512"]
- fn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh"]
- fn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundph_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ transmute(vcvtph2w_512(a, src.as_i16x32(), k, ROUNDING))
+}
- #[link_name = "llvm.x86.avx512fp16.sqrt.ph.512"]
- fn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh"]
- fn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundph_epi16(
+ k: __mmask32,
+ a: __m512h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epi16::(_mm512_setzero_si512(), k, a)
+}
- #[link_name = "llvm.x86.avx512fp16.max.ph.128"]
- fn vmaxph_128(a: __m128h, b: __m128h) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.max.ph.256"]
- fn vmaxph_256(a: __m256h, b: __m256h) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.max.ph.512"]
- fn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.max.sh.round"]
- fn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtph_epu16(a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epu16(_mm_undefined_si128(), 0xff, a)
+}
- #[link_name = "llvm.x86.avx512fp16.min.ph.128"]
- fn vminph_128(a: __m128h, b: __m128h) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.min.ph.256"]
- fn vminph_256(a: __m256h, b: __m256h) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.min.ph.512"]
- fn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.min.sh.round"]
- fn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvtph2uw_128(a, src.as_u16x8(), k))
+}
- #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.128"]
- fn vgetexpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.256"]
- fn vgetexpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.512"]
- fn vgetexpph_512(a: __m512h, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.getexp.sh"]
- fn vgetexpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epu16(_mm_setzero_si128(), k, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.128"]
- fn vgetmantph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.256"]
- fn vgetmantph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.512"]
- fn vgetmantph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.getmant.sh"]
- fn vgetmantsh(
- a: __m128h,
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtph_epu16(a: __m256h) -> __m256i {
+ _mm256_mask_cvtph_epu16(_mm256_undefined_si256(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
+ transmute(vcvtph2uw_256(a, src.as_u16x16(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i {
+ _mm256_mask_cvtph_epu16(_mm256_setzero_si256(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtph_epu16(a: __m512h) -> __m512i {
+ _mm512_mask_cvtph_epu16(_mm512_undefined_epi32(), 0xffffffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
+ transmute(vcvtph2uw_512(
+ a,
+ src.as_u16x32(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i {
+ _mm512_mask_cvtph_epu16(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundph_epu16(a: __m512h) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epu16::(_mm512_undefined_epi32(), 0xffffffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundph_epu16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ transmute(vcvtph2uw_512(a, src.as_u16x32(), k, ROUNDING))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundph_epu16(
+ k: __mmask32,
+ a: __m512h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epu16::(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvttph_epi16(a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epi16(_mm_undefined_si128(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvttph2w_128(a, src.as_i16x8(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epi16(_mm_setzero_si128(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvttph_epi16(a: __m256h) -> __m256i {
+ _mm256_mask_cvttph_epi16(_mm256_undefined_si256(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
+ transmute(vcvttph2w_256(a, src.as_i16x16(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i {
+ _mm256_mask_cvttph_epi16(_mm256_setzero_si256(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvttph_epi16(a: __m512h) -> __m512i {
+ _mm512_mask_cvttph_epi16(_mm512_undefined_epi32(), 0xffffffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
+ transmute(vcvttph2w_512(
+ a,
+ src.as_i16x32(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i {
+ _mm512_mask_cvttph_epi16(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtt_roundph_epi16(a: __m512h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epi16::(_mm512_undefined_epi32(), 0xffffffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtt_roundph_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512i {
+ static_assert_sae!(SAE);
+ transmute(vcvttph2w_512(a, src.as_i16x32(), k, SAE))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtt_roundph_epi16(k: __mmask32, a: __m512h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epi16::(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvttph_epu16(a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epu16(_mm_undefined_si128(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvttph2uw_128(a, src.as_u16x8(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epu16(_mm_setzero_si128(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvttph_epu16(a: __m256h) -> __m256i {
+ _mm256_mask_cvttph_epu16(_mm256_undefined_si256(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
+ transmute(vcvttph2uw_256(a, src.as_u16x16(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i {
+ _mm256_mask_cvttph_epu16(_mm256_setzero_si256(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvttph_epu16(a: __m512h) -> __m512i {
+ _mm512_mask_cvttph_epu16(_mm512_undefined_epi32(), 0xffffffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
+ transmute(vcvttph2uw_512(
+ a,
+ src.as_u16x32(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i {
+ _mm512_mask_cvttph_epu16(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtt_roundph_epu16(a: __m512h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epu16::(_mm512_undefined_epi32(), 0xffffffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtt_roundph_epu16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512i {
+ static_assert_sae!(SAE);
+ transmute(vcvttph2uw_512(a, src.as_u16x32(), k, SAE))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtt_roundph_epu16(k: __mmask32, a: __m512h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epu16::