From c9588c583c5e7f941cec800ae5e0dbb2b9c5ff15 Mon Sep 17 00:00:00 2001
From: sayantn
Date: Sat, 13 Jul 2024 23:45:19 +0530
Subject: [PATCH] AVX512FP16 Part 5: FP-Support
`getexp`, `getmant`, `roundscale`, `scalef`, `reduce`
---
crates/core_arch/missing-x86.md | 90 -
crates/core_arch/src/x86/avx512fp16.rs | 10321 +++++++++++++++--------
2 files changed, 6749 insertions(+), 3662 deletions(-)
diff --git a/crates/core_arch/missing-x86.md b/crates/core_arch/missing-x86.md
index c0b8aa1457..72fc8b840e 100644
--- a/crates/core_arch/missing-x86.md
+++ b/crates/core_arch/missing-x86.md
@@ -103,10 +103,6 @@
* [ ] [`_mm512_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps)
* [ ] [`_mm512_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph)
* [ ] [`_mm512_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fpclass_ph_mask)
- * [ ] [`_mm512_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_ph)
- * [ ] [`_mm512_getexp_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ph)
- * [ ] [`_mm512_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ph)
- * [ ] [`_mm512_getmant_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ph)
* [ ] [`_mm512_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph)
* [ ] [`_mm512_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask)
* [ ] [`_mm512_mask_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask)
@@ -155,16 +151,6 @@
* [ ] [`_mm512_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps)
* [ ] [`_mm512_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph)
* [ ] [`_mm512_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fpclass_ph_mask)
- * [ ] [`_mm512_mask_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_ph)
- * [ ] [`_mm512_mask_getexp_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ph)
- * [ ] [`_mm512_mask_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_ph)
- * [ ] [`_mm512_mask_getmant_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_ph)
- * [ ] [`_mm512_mask_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph)
- * [ ] [`_mm512_mask_reduce_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph)
- * [ ] [`_mm512_mask_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ph)
- * [ ] [`_mm512_mask_roundscale_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_ph)
- * [ ] [`_mm512_mask_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_ph)
- * [ ] [`_mm512_mask_scalef_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph)
* [ ] [`_mm512_maskz_cvt_roundepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph)
* [ ] [`_mm512_maskz_cvt_roundepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph)
* [ ] [`_mm512_maskz_cvt_roundepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph)
@@ -209,28 +195,12 @@
* [ ] [`_mm512_maskz_cvtx_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph)
* [ ] [`_mm512_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps)
* [ ] [`_mm512_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph)
- * [ ] [`_mm512_maskz_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_ph)
- * [ ] [`_mm512_maskz_getexp_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ph)
- * [ ] [`_mm512_maskz_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ph)
- * [ ] [`_mm512_maskz_getmant_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_ph)
- * [ ] [`_mm512_maskz_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph)
- * [ ] [`_mm512_maskz_reduce_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph)
- * [ ] [`_mm512_maskz_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ph)
- * [ ] [`_mm512_maskz_roundscale_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_ph)
- * [ ] [`_mm512_maskz_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_ph)
- * [ ] [`_mm512_maskz_scalef_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ph)
* [ ] [`_mm512_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph)
* [ ] [`_mm512_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph)
* [ ] [`_mm512_reduce_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ph)
* [ ] [`_mm512_reduce_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ph)
* [ ] [`_mm512_reduce_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ph)
* [ ] [`_mm512_reduce_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_ph)
- * [ ] [`_mm512_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_ph)
- * [ ] [`_mm512_reduce_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_round_ph)
- * [ ] [`_mm512_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ph)
- * [ ] [`_mm512_roundscale_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_ph)
- * [ ] [`_mm512_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ph)
- * [ ] [`_mm512_scalef_round_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ph)
* [ ] [`_mm512_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_pch)
* [ ] [`_mm_cvt_roundi32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh)
* [ ] [`_mm_cvt_roundi64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sh)
@@ -268,10 +238,6 @@
* [ ] [`_mm_cvtu32_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh)
* [ ] [`_mm_cvtu64_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sh)
* [ ] [`_mm_fpclass_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_sh_mask)
- * [ ] [`_mm_getexp_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_sh)
- * [ ] [`_mm_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sh)
- * [ ] [`_mm_getmant_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sh)
- * [ ] [`_mm_getmant_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sh)
* [ ] [`_mm_mask_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh)
* [ ] [`_mm_mask_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd)
* [ ] [`_mm_mask_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss)
@@ -281,16 +247,6 @@
* [ ] [`_mm_mask_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss)
* [ ] [`_mm_mask_cvtss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh)
* [ ] [`_mm_mask_fpclass_sh_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_sh_mask)
- * [ ] [`_mm_mask_getexp_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_sh)
- * [ ] [`_mm_mask_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh)
- * [ ] [`_mm_mask_getmant_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh)
- * [ ] [`_mm_mask_getmant_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh)
- * [ ] [`_mm_mask_reduce_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh)
- * [ ] [`_mm_mask_reduce_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh)
- * [ ] [`_mm_mask_roundscale_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_sh)
- * [ ] [`_mm_mask_roundscale_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_sh)
- * [ ] [`_mm_mask_scalef_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_sh)
- * [ ] [`_mm_mask_scalef_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh)
* [ ] [`_mm_maskz_cvt_roundsd_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh)
* [ ] [`_mm_maskz_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd)
* [ ] [`_mm_maskz_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss)
@@ -299,22 +255,6 @@
* [ ] [`_mm_maskz_cvtsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd)
* [ ] [`_mm_maskz_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss)
* [ ] [`_mm_maskz_cvtss_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh)
- * [ ] [`_mm_maskz_getexp_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_sh)
- * [ ] [`_mm_maskz_getexp_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh)
- * [ ] [`_mm_maskz_getmant_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh)
- * [ ] [`_mm_maskz_getmant_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh)
- * [ ] [`_mm_maskz_reduce_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh)
- * [ ] [`_mm_maskz_reduce_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh)
- * [ ] [`_mm_maskz_roundscale_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_sh)
- * [ ] [`_mm_maskz_roundscale_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_sh)
- * [ ] [`_mm_maskz_scalef_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_sh)
- * [ ] [`_mm_maskz_scalef_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh)
- * [ ] [`_mm_reduce_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh)
- * [ ] [`_mm_reduce_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh)
- * [ ] [`_mm_roundscale_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_sh)
- * [ ] [`_mm_roundscale_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_sh)
- * [ ] [`_mm_scalef_round_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sh)
- * [ ] [`_mm_scalef_sh`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sh)
* [ ] [`_mm_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pch)
@@ -345,8 +285,6 @@
* [ ] [`_mm256_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps)
* [ ] [`_mm256_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph)
* [ ] [`_mm256_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask)
- * [ ] [`_mm256_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getexp_ph)
- * [ ] [`_mm256_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getmant_ph)
* [ ] [`_mm256_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph)
* [ ] [`_mm256_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask)
* [ ] [`_mm256_mask_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph)
@@ -372,11 +310,6 @@
* [ ] [`_mm256_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps)
* [ ] [`_mm256_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph)
* [ ] [`_mm256_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask)
- * [ ] [`_mm256_mask_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getexp_ph)
- * [ ] [`_mm256_mask_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph)
- * [ ] [`_mm256_mask_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph)
- * [ ] [`_mm256_mask_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph)
- * [ ] [`_mm256_mask_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph)
* [ ] [`_mm256_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph)
* [ ] [`_mm256_maskz_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph)
* [ ] [`_mm256_maskz_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph)
@@ -399,20 +332,12 @@
* [ ] [`_mm256_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64)
* [ ] [`_mm256_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps)
* [ ] [`_mm256_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph)
- * [ ] [`_mm256_maskz_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph)
- * [ ] [`_mm256_maskz_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph)
- * [ ] [`_mm256_maskz_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph)
- * [ ] [`_mm256_maskz_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph)
- * [ ] [`_mm256_maskz_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_scalef_ph)
* [ ] [`_mm256_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph)
* [ ] [`_mm256_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph)
* [ ] [`_mm256_reduce_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_add_ph)
* [ ] [`_mm256_reduce_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_max_ph)
* [ ] [`_mm256_reduce_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_min_ph)
* [ ] [`_mm256_reduce_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_mul_ph)
- * [ ] [`_mm256_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_ph)
- * [ ] [`_mm256_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ph)
- * [ ] [`_mm256_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph)
* [ ] [`_mm_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask)
* [ ] [`_mm_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph)
* [ ] [`_mm_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph)
@@ -437,8 +362,6 @@
* [ ] [`_mm_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps)
* [ ] [`_mm_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph)
* [ ] [`_mm_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask)
- * [ ] [`_mm_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ph)
- * [ ] [`_mm_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ph)
* [ ] [`_mm_mask_blend_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph)
* [ ] [`_mm_mask_cmp_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask)
* [ ] [`_mm_mask_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph)
@@ -464,11 +387,6 @@
* [ ] [`_mm_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps)
* [ ] [`_mm_mask_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph)
* [ ] [`_mm_mask_fpclass_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask)
- * [ ] [`_mm_mask_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ph)
- * [ ] [`_mm_mask_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ph)
- * [ ] [`_mm_mask_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph)
- * [ ] [`_mm_mask_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph)
- * [ ] [`_mm_mask_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph)
* [ ] [`_mm_maskz_cvtepi16_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph)
* [ ] [`_mm_maskz_cvtepi32_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph)
* [ ] [`_mm_maskz_cvtepi64_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph)
@@ -491,20 +409,12 @@
* [ ] [`_mm_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64)
* [ ] [`_mm_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps)
* [ ] [`_mm_maskz_cvtxps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph)
- * [ ] [`_mm_maskz_getexp_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph)
- * [ ] [`_mm_maskz_getmant_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ph)
- * [ ] [`_mm_maskz_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph)
- * [ ] [`_mm_maskz_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph)
- * [ ] [`_mm_maskz_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ph)
* [ ] [`_mm_permutex2var_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph)
* [ ] [`_mm_permutexvar_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph)
* [ ] [`_mm_reduce_add_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_add_ph)
* [ ] [`_mm_reduce_max_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_max_ph)
* [ ] [`_mm_reduce_min_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_min_ph)
* [ ] [`_mm_reduce_mul_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_mul_ph)
- * [ ] [`_mm_reduce_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_ph)
- * [ ] [`_mm_roundscale_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ph)
- * [ ] [`_mm_scalef_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ph)
diff --git a/crates/core_arch/src/x86/avx512fp16.rs b/crates/core_arch/src/x86/avx512fp16.rs
index b30bc63ed4..ff4a06dde4 100644
--- a/crates/core_arch/src/x86/avx512fp16.rs
+++ b/crates/core_arch/src/x86/avx512fp16.rs
@@ -8440,6033 +8440,9210 @@ pub unsafe fn _mm_maskz_min_round_sh(
_mm_mask_min_round_sh::(_mm_setzero_ph(), k, a, b)
}
-#[allow(improper_ctypes)]
-extern "C" {
- #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
- fn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
- #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
- fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
-
- #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
- fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
- fn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
- fn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
- fn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
+/// This intrinsic essentially calculates `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_getexp_ph(a: __m128h) -> __m128h {
+ _mm_mask_getexp_ph(_mm_undefined_ph(), 0xff, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
- fn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
- fn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
- fn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
- fn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
+/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
+ vgetexpph_128(a, src, k)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
- fn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
- fn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
- fn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
- fn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
+/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h {
+ _mm_mask_getexp_ph(_mm_setzero_ph(), k, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
- fn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
- fn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
- fn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
- fn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
+/// This intrinsic essentially calculates `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_getexp_ph(a: __m256h) -> __m256h {
+ _mm256_mask_getexp_ph(_mm256_undefined_ph(), 0xffff, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
- fn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
- fn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
- fn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
- fn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
- fn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
- fn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
- fn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
- fn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
+/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
+ vgetexpph_256(a, src, k)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
- fn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
- fn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
- fn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
- fn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
- fn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
- -> __m512;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
- fn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
- -> __m512;
- #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
- fn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
- #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
- fn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
+/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h {
+ _mm256_mask_getexp_ph(_mm256_setzero_ph(), k, a)
+}
- #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
- fn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.fma.f16"]
- fn fmaf16(a: f16, b: f16, c: f16) -> f16; // TODO: use `crate::intrinsics::fmaf16` when it's available
- #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
- fn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
+/// This intrinsic essentially calculates `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_getexp_ph(a: __m512h) -> __m512h {
+ _mm512_mask_getexp_ph(_mm512_undefined_ph(), 0xffffffff, a)
+}
- #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"]
- fn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"]
- fn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
- fn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
+/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
+ _mm512_mask_getexp_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128"]
- fn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256"]
- fn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512"]
- fn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.rcp.sh"]
- fn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
+/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h {
+ _mm512_mask_getexp_ph(_mm512_setzero_ph(), k, a)
+}
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128"]
- fn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256"]
- fn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512"]
- fn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh"]
- fn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
+/// This intrinsic essentially calculates `floor(log2(x))` for each element. Exceptions can be suppressed
+/// by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_getexp_round_ph(a: __m512h) -> __m512h {
+ _mm512_mask_getexp_round_ph::(_mm512_undefined_ph(), 0xffffffff, a)
+}
- #[link_name = "llvm.x86.avx512fp16.sqrt.ph.512"]
- fn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh"]
- fn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
+/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_getexp_round_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ vgetexpph_512(a, src, k, SAE)
+}
- #[link_name = "llvm.x86.avx512fp16.max.ph.128"]
- fn vmaxph_128(a: __m128h, b: __m128h) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.max.ph.256"]
- fn vmaxph_256(a: __m256h, b: __m256h) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.max.ph.512"]
- fn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.max.sh.round"]
- fn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
+/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
+/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_getexp_round_ph(k: __mmask32, a: __m512h) -> __m512h {
+ _mm512_mask_getexp_round_ph::(_mm512_setzero_ph(), k, a)
+}
- #[link_name = "llvm.x86.avx512fp16.min.ph.128"]
- fn vminph_128(a: __m128h, b: __m128h) -> __m128h;
- #[link_name = "llvm.x86.avx512fp16.min.ph.256"]
- fn vminph_256(a: __m256h, b: __m256h) -> __m256h;
- #[link_name = "llvm.x86.avx512fp16.min.ph.512"]
- fn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
- #[link_name = "llvm.x86.avx512fp16.mask.min.sh.round"]
- fn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
+/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
+/// calculates `floor(log2(x))` for the lower element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_getexp_sh(_mm_undefined_ph(), 0xff, a, b)
+}
+/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
+/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
+/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
+/// for the lower element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_getexp_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
}
-#[cfg(test)]
-mod tests {
- use crate::core_arch::x86::*;
- use crate::mem::transmute;
- use crate::ptr::{addr_of, addr_of_mut};
- use stdarch_test::simd_test;
+/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
+/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
+/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
+/// lower element.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_getexp_sh(_mm_setzero_ph(), k, a, b)
+}
- #[target_feature(enable = "avx512fp16")]
- unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
- _mm_setr_ph(re, im, re, im, re, im, re, im)
- }
+/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
+/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
+/// calculates `floor(log2(x))` for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
+/// in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_getexp_round_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_getexp_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
- #[target_feature(enable = "avx512fp16")]
- unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
- _mm256_setr_ph(
- re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
- )
- }
+/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
+/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
+/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
+/// for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_getexp_round_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ vgetexpsh(a, b, src, k, SAE)
+}
- #[target_feature(enable = "avx512fp16")]
- unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
- _mm512_setr_ph(
- re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
- re, im, re, im, re, im, re, im, re, im,
- )
- }
+/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
+/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
+/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
+/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
+/// lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_getexp_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ _mm_mask_getexp_round_sh::(_mm_setzero_ph(), k, a, b)
+}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set_ph() {
- let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
+/// on the interval range defined by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(1, 2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ a: __m128h,
+) -> __m128h {
+ _mm_mask_getmant_ph::(_mm_undefined_ph(), 0xff, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+) -> __m128h {
+ vgetmantph_128(a, (SIGN << 2) | NORM, src, k)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ k: __mmask8,
+ a: __m128h,
+) -> __m128h {
+ _mm_mask_getmant_ph::(_mm_setzero_ph(), k, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
+/// on the interval range defined by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(1, 2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ a: __m256h,
+) -> __m256h {
+ _mm256_mask_getmant_ph::(_mm256_undefined_ph(), 0xffff, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ src: __m256h,
+ k: __mmask16,
+ a: __m256h,
+) -> __m256h {
+ vgetmantph_256(a, (SIGN << 2) | NORM, src, k)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ k: __mmask16,
+ a: __m256h,
+) -> __m256h {
+ _mm256_mask_getmant_ph::(_mm256_setzero_ph(), k, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
+/// on the interval range defined by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(1, 2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ a: __m512h,
+) -> __m512h {
+ _mm512_mask_getmant_ph::(_mm512_undefined_ph(), 0xffffffff, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ _mm512_mask_getmant_round_ph::(src, k, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_getmant_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ _mm512_mask_getmant_ph::(_mm512_setzero_ph(), k, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
+/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
+/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
+#[rustc_legacy_const_generics(1, 2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_getmant_round_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+ const SAE: i32,
+>(
+ a: __m512h,
+) -> __m512h {
+ _mm512_mask_getmant_round_ph::(_mm512_undefined_ph(), 0xffffffff, a)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
+/// in the sae parameter
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
+#[rustc_legacy_const_generics(3, 4, 5)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_getmant_round_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+ const SAE: i32,
+>(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ vgetmantph_512(a, (SIGN << 2) | NORM, src, k, SAE)
+}
+
+/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
+/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
+/// in the sae parameter
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
+#[rustc_legacy_const_generics(2, 3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_getmant_round_ph<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+ const SAE: i32,
+>(
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ _mm512_mask_getmant_round_ph::(_mm512_setzero_ph(), k, a)
+}
+
+/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
+/// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
+/// on the interval range defined by norm and the sign depends on sign and the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_getmant_sh<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ _mm_mask_getmant_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
+/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
+/// the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(4, 5)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_getmant_sh<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ _mm_mask_getmant_round_sh::(src, k, a, b)
+}
+
+/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
+/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
+/// the source sign.
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_getmant_sh<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+>(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ _mm_mask_getmant_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
+/// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
+/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
+/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
+#[rustc_legacy_const_generics(2, 3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_getmant_round_sh<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+ const SAE: i32,
+>(
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ _mm_mask_getmant_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
+/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
+/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
+#[rustc_legacy_const_generics(4, 5, 6)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_getmant_round_sh<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+ const SAE: i32,
+>(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ vgetmantsh(a, b, (SIGN << 2) | NORM, src, k, SAE)
+}
+
+/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
+/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
+/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
+/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// The mantissa is normalized to the interval specified by interv, which can take the following values:
+///
+/// _MM_MANT_NORM_1_2 // interval [1, 2)
+/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
+/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
+/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
+///
+/// The sign is determined by sc which can take the following values:
+///
+/// _MM_MANT_SIGN_src // sign = sign(src)
+/// _MM_MANT_SIGN_zero // sign = 0
+/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
+#[rustc_legacy_const_generics(3, 4, 5)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_getmant_round_sh<
+ const NORM: _MM_MANTISSA_NORM_ENUM,
+ const SIGN: _MM_MANTISSA_SIGN_ENUM,
+ const SAE: i32,
+>(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ _mm_mask_getmant_round_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_roundscale_ph(a: __m128h) -> __m128h {
+ _mm_mask_roundscale_ph::(_mm_undefined_ph(), 0xff, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
+/// the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_roundscale_ph(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+) -> __m128h {
+ vrndscaleph_128(a, IMM8, src, k)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_roundscale_ph(k: __mmask8, a: __m128h) -> __m128h {
+ _mm_mask_roundscale_ph::(_mm_setzero_ph(), k, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_roundscale_ph(a: __m256h) -> __m256h {
+ _mm256_mask_roundscale_ph::(_mm256_undefined_ph(), 0xffff, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
+/// the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_roundscale_ph(
+ src: __m256h,
+ k: __mmask16,
+ a: __m256h,
+) -> __m256h {
+ vrndscaleph_256(a, IMM8, src, k)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_roundscale_ph(k: __mmask16, a: __m256h) -> __m256h {
+ _mm256_mask_roundscale_ph::(_mm256_setzero_ph(), k, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_roundscale_ph(a: __m512h) -> __m512h {
+ _mm512_mask_roundscale_ph::(_mm512_undefined_ph(), 0xffffffff, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
+/// the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_roundscale_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ _mm512_mask_roundscale_round_ph::(src, k, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_roundscale_ph(k: __mmask32, a: __m512h) -> __m512h {
+ _mm512_mask_roundscale_ph::(_mm512_setzero_ph(), k, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
+/// in the sae parameter
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(1, 2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_roundscale_round_ph(a: __m512h) -> __m512h {
+ _mm512_mask_roundscale_round_ph::(_mm512_undefined_ph(), 0xffffffff, a)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
+/// the corresponding mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
+/// in the sae parameter
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_roundscale_round_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ vrndscaleph_512(a, IMM8, src, k, SAE)
+}
+
+/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
+/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_roundscale_round_ph(
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ _mm512_mask_roundscale_round_ph::(_mm512_setzero_ph(), k, a)
+}
+
+/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
+/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
+/// from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_roundscale_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_roundscale_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
+/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
+/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_roundscale_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ _mm_mask_roundscale_round_sh::(src, k, a, b)
+}
+
+/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
+/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
+/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_roundscale_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ _mm_mask_roundscale_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
+/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
+/// from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_roundscale_round_sh(
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ _mm_mask_roundscale_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
+/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
+/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(4, 5)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_roundscale_round_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ vrndscalesh(a, b, src, k, IMM8, SAE)
+}
+
+/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
+/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
+/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_roundscale_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ _mm_mask_roundscale_round_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_scalef_ph(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ vscalefph_128(a, b, src, k)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_scalef_ph(_mm_setzero_ph(), k, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h {
+ _mm256_mask_scalef_ph(_mm256_undefined_ph(), 0xffff, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ vscalefph_256(a, b, src, k)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+ _mm256_mask_scalef_ph(_mm256_setzero_ph(), k, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mask_scalef_ph(_mm512_undefined_ph(), 0xffffffff, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mask_scalef_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefph))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mask_scalef_ph(_mm512_setzero_ph(), k, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_scalef_round_ph(a: __m512h, b: __m512h) -> __m512h {
+ _mm512_mask_scalef_round_ph::(_mm512_undefined_ph(), 0xffffffff, a, b)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_scalef_round_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ vscalefph_512(a, b, src, k, ROUNDING)
+}
+
+/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_scalef_round_ph(
+ k: __mmask32,
+ a: __m512h,
+ b: __m512h,
+) -> __m512h {
+ _mm512_mask_scalef_round_ph::(_mm512_setzero_ph(), k, a, b)
+}
+
+/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
+/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_scalef_sh(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
+/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_scalef_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
+}
+
+/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
+/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefsh))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_scalef_sh(_mm_setzero_ph(), k, a, b)
+}
+
+/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
+/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
+/// elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_scalef_round_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_scalef_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
+/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_scalef_round_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ vscalefsh(a, b, src, k, ROUNDING)
+}
+
+/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
+/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
+/// and copy the upper 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_scalef_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ _mm_mask_scalef_round_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_reduce_ph(a: __m128h) -> __m128h {
+ _mm_mask_reduce_ph::(_mm_undefined_ph(), 0xff, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_reduce_ph(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+) -> __m128h {
+ vreduceph_128(a, IMM8, src, k)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_reduce_ph(k: __mmask8, a: __m128h) -> __m128h {
+ _mm_mask_reduce_ph::(_mm_setzero_ph(), k, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_reduce_ph(a: __m256h) -> __m256h {
+ _mm256_mask_reduce_ph::(_mm256_undefined_ph(), 0xffff, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_reduce_ph(
+ src: __m256h,
+ k: __mmask16,
+ a: __m256h,
+) -> __m256h {
+ vreduceph_256(a, IMM8, src, k)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_reduce_ph(k: __mmask16, a: __m256h) -> __m256h {
+ _mm256_mask_reduce_ph::(_mm256_setzero_ph(), k, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_reduce_ph(a: __m512h) -> __m512h {
+ _mm512_mask_reduce_ph::(_mm512_undefined_ph(), 0xffffffff, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_reduce_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ _mm512_mask_reduce_round_ph::(src, k, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_reduce_ph(k: __mmask32, a: __m512h) -> __m512h {
+ _mm512_mask_reduce_ph::(_mm512_setzero_ph(), k, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(1, 2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_reduce_round_ph(a: __m512h) -> __m512h {
+ _mm512_mask_reduce_round_ph::(_mm512_undefined_ph(), 0xffffffff, a)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_reduce_round_ph(
+ src: __m512h,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ vreduceph_512(a, IMM8, src, k, SAE)
+}
+
+/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
+/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
+/// out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_reduce_round_ph(
+ k: __mmask32,
+ a: __m512h,
+) -> __m512h {
+ _mm512_mask_reduce_round_ph::(_mm512_setzero_ph(), k, a)
+}
+
+/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
+/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the
+/// upper 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_reduce_sh(a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_reduce_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
+/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
+/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from
+/// a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_reduce_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ _mm_mask_reduce_round_sh::(src, k, a, b)
+}
+
+/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
+/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
+/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
+/// to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_reduce_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+ _mm_mask_reduce_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
+/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the upper
+/// 7 packed elements from a to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(2, 3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_reduce_round_sh(
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ _mm_mask_reduce_round_sh::(_mm_undefined_ph(), 0xff, a, b)
+}
+
+/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
+/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
+/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a
+/// to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(4, 5)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_reduce_round_sh(
+ src: __m128h,
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ vreducesh(a, b, src, k, IMM8, SAE)
+}
+
+/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
+/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
+/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
+/// to the upper elements of dst.
+///
+/// Rounding is done according to the imm8 parameter, which can be one of:
+///
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest
+/// _MM_FROUND_TO_NEG_INF // round down
+/// _MM_FROUND_TO_POS_INF // round up
+/// _MM_FROUND_TO_ZERO // truncate
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
+#[rustc_legacy_const_generics(3, 4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_reduce_round_sh(
+ k: __mmask8,
+ a: __m128h,
+ b: __m128h,
+) -> __m128h {
+ _mm_mask_reduce_round_sh::(_mm_setzero_ph(), k, a, b)
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
+ fn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
+ #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
+ fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
+
+ #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
+ fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
+ fn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
+ fn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
+ fn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
+ fn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
+ fn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
+ fn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
+ fn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
+ fn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
+ fn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
+ fn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
+ fn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
+ fn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
+ fn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
+ fn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
+ fn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
+ fn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
+ fn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
+ fn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
+ fn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
+ fn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
+ fn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
+ fn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
+ fn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
+ fn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
+ fn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
+ fn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
+ fn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
+ fn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
+ -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
+ fn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
+ -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
+ fn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
+ fn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
+ fn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.fma.f16"]
+ fn fmaf16(a: f16, b: f16, c: f16) -> f16; // TODO: use `crate::intrinsics::fmaf16` when it's available
+ #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
+ fn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
+
+ #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"]
+ fn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"]
+ fn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
+ fn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128"]
+ fn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256"]
+ fn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512"]
+ fn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rcp.sh"]
+ fn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128"]
+ fn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256"]
+ fn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512"]
+ fn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh"]
+ fn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.sqrt.ph.512"]
+ fn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh"]
+ fn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.max.ph.128"]
+ fn vmaxph_128(a: __m128h, b: __m128h) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.max.ph.256"]
+ fn vmaxph_256(a: __m256h, b: __m256h) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.max.ph.512"]
+ fn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.max.sh.round"]
+ fn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.min.ph.128"]
+ fn vminph_128(a: __m128h, b: __m128h) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.min.ph.256"]
+ fn vminph_256(a: __m256h, b: __m256h) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.min.ph.512"]
+ fn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.min.sh.round"]
+ fn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.128"]
+ fn vgetexpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.256"]
+ fn vgetexpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.512"]
+ fn vgetexpph_512(a: __m512h, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getexp.sh"]
+ fn vgetexpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.128"]
+ fn vgetmantph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.256"]
+ fn vgetmantph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.512"]
+ fn vgetmantph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.getmant.sh"]
+ fn vgetmantsh(
+ a: __m128h,
+ b: __m128h,
+ imm8: i32,
+ src: __m128h,
+ k: __mmask8,
+ sae: i32,
+ ) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.128"]
+ fn vrndscaleph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.256"]
+ fn vrndscaleph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.512"]
+ fn vrndscaleph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.rndscale.sh"]
+ fn vrndscalesh(
+ a: __m128h,
+ b: __m128h,
+ src: __m128h,
+ k: __mmask8,
+ imm8: i32,
+ sae: i32,
+ ) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.128"]
+ fn vscalefph_128(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.256"]
+ fn vscalefph_256(a: __m256h, b: __m256h, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.512"]
+ fn vscalefph_512(a: __m512h, b: __m512h, src: __m512h, k: __mmask32, rounding: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.scalef.sh"]
+ fn vscalefsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.128"]
+ fn vreduceph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.256"]
+ fn vreduceph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
+ #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.512"]
+ fn vreduceph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
+ #[link_name = "llvm.x86.avx512fp16.mask.reduce.sh"]
+ fn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32)
+ -> __m128h;
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::x86::*;
+ use crate::mem::transmute;
+ use crate::ptr::{addr_of, addr_of_mut};
+ use stdarch_test::simd_test;
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
+ _mm_setr_ph(re, im, re, im, re, im, re, im)
+ }
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
+ _mm256_setr_ph(
+ re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
+ )
+ }
+
+ #[target_feature(enable = "avx512fp16")]
+ unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
+ _mm512_setr_ph(
+ re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
+ re, im, re, im, re, im, re, im, re, im,
+ )
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set_ph() {
+ let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_set_ph() {
+ let r = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let e = _mm256_setr_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_set_ph() {
+ let r = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let e = _mm512_setr_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set_sh() {
+ let r = _mm_set_sh(1.0);
+ let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_set1_ph() {
+ let r = _mm_set1_ph(1.0);
+ let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_set1_ph() {
+ let r = _mm256_set1_ph(1.0);
+ let e = _mm256_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_set1_ph() {
+ let r = _mm512_set1_ph(1.0);
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_setr_ph() {
+ let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_setr_ph() {
+ let r = _mm256_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let e = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_setr_ph() {
+ let r = _mm512_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let e = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_setzero_ph() {
+ let r = _mm_setzero_ph();
+ let e = _mm_set1_ph(0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_setzero_ph() {
+ let r = _mm256_setzero_ph();
+ let e = _mm256_set1_ph(0.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_setzero_ph() {
+ let r = _mm512_setzero_ph();
+ let e = _mm512_set1_ph(0.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castsi128_ph() {
+ let a = _mm_set1_epi16(0x3c00);
+ let r = _mm_castsi128_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castsi256_ph() {
+ let a = _mm256_set1_epi16(0x3c00);
+ let r = _mm256_castsi256_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castsi512_ph() {
+ let a = _mm512_set1_epi16(0x3c00);
+ let r = _mm512_castsi512_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_si128() {
+ let a = _mm_set1_ph(1.0);
+ let r = _mm_castph_si128(a);
+ let e = _mm_set1_epi16(0x3c00);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_si256() {
+ let a = _mm256_set1_ph(1.0);
+ let r = _mm256_castph_si256(a);
+ let e = _mm256_set1_epi16(0x3c00);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_si512() {
+ let a = _mm512_set1_ph(1.0);
+ let r = _mm512_castph_si512(a);
+ let e = _mm512_set1_epi16(0x3c00);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castps_ph() {
+ let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
+ let r = _mm_castps_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castps_ph() {
+ let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
+ let r = _mm256_castps_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castps_ph() {
+ let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
+ let r = _mm512_castps_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_ps() {
+ let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
+ let r = _mm_castph_ps(a);
+ let e = _mm_set1_ps(1.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_ps() {
+ let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
+ let r = _mm256_castph_ps(a);
+ let e = _mm256_set1_ps(1.0);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_ps() {
+ let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
+ let r = _mm512_castph_ps(a);
+ let e = _mm512_set1_ps(1.0);
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castpd_ph() {
+ let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
+ let r = _mm_castpd_ph(a);
+ let e = _mm_set1_ph(1.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castpd_ph() {
+ let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
+ let r = _mm256_castpd_ph(a);
+ let e = _mm256_set1_ph(1.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castpd_ph() {
+ let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
+ let r = _mm512_castpd_ph(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_castph_pd() {
+ let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
+ let r = _mm_castph_pd(a);
+ let e = _mm_set1_pd(1.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph_pd() {
+ let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
+ let r = _mm256_castph_pd(a);
+ let e = _mm256_set1_pd(1.0);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph_pd() {
+ let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
+ let r = _mm512_castph_pd(a);
+ let e = _mm512_set1_pd(1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph256_ph128() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm256_castph256_ph128(a);
+ let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph512_ph128() {
+ let a = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
+ 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ );
+ let r = _mm512_castph512_ph128(a);
+ let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph512_ph256() {
+ let a = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
+ 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ );
+ let r = _mm512_castph512_ph256(a);
+ let e = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_castph128_ph256() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm256_castph128_ph256(a);
+ assert_eq_m128h(_mm256_castph256_ph128(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph128_ph512() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_castph128_ph512(a);
+ assert_eq_m128h(_mm512_castph512_ph128(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_castph256_ph512() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_castph256_ph512(a);
+ assert_eq_m256h(_mm512_castph512_ph256(r), a);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm256_zextph128_ph256() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm256_zextph128_ph256(a);
+ let e = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_zextph128_ph512() {
+ let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_zextph128_ph512(a);
+ let e = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_zextph256_ph512() {
+ let a = _mm256_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_zextph256_ph512(a);
+ let e = _mm512_setr_ph(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cmp_round_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cmp_round_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cmp_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cmp_sh_mask() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comi_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comi_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comieq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comieq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comige_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comige_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comigt_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_comigt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comile_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comile_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comilt_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comilt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_comineq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_comineq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomieq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomieq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomige_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomige_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomigt_sh() {
+ let a = _mm_set_sh(2.0);
+ let b = _mm_set_sh(1.0);
+ let r = _mm_ucomigt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomile_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomile_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomilt_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomilt_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_ucomineq_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_ucomineq_sh(a, b);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_load_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_load_ph(addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_load_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_load_ph(addr_of!(a).cast());
+ assert_eq_m256h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_load_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_load_ph(addr_of!(a).cast());
+ assert_eq_m512h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_load_sh(addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let src = _mm_set_sh(2.);
+ let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
+ assert_eq_m128h(src, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_load_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
+ assert_eq_m128h(a, b);
+ let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
+ assert_eq_m128h(_mm_setzero_ph(), b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_loadu_ph() {
+ let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
+ let r = _mm_loadu_ph(array.as_ptr());
+ let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_loadu_ph() {
+ let array = [
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ ];
+ let r = _mm256_loadu_ph(array.as_ptr());
+ let e = _mm256_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_loadu_ph() {
+ let array = [
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ ];
+ let r = _mm512_loadu_ph(array.as_ptr());
+ let e = _mm512_setr_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let r = _mm_move_sh(a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let src = _mm_set_sh(10.0);
+ let r = _mm_mask_move_sh(src, 0, a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_move_sh() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_sh(9.0);
+ let r = _mm_maskz_move_sh(0, a, b);
+ let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_store_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let mut b = _mm_setzero_ph();
+ _mm_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_store_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let mut b = _mm256_setzero_ph();
+ _mm256_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m256h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_store_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let mut b = _mm512_setzero_ph();
+ _mm512_store_ph(addr_of_mut!(b).cast(), a);
+ assert_eq_m512h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_store_sh() {
+ let a = _mm_set_sh(1.0);
+ let mut b = _mm_setzero_ph();
+ _mm_store_sh(addr_of_mut!(b).cast(), a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_store_sh() {
+ let a = _mm_set_sh(1.0);
+ let mut b = _mm_setzero_ph();
+ _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
+ assert_eq_m128h(_mm_setzero_ph(), b);
+ _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
+ assert_eq_m128h(a, b);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_storeu_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let mut array = [0.0; 8];
+ _mm_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_storeu_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let mut array = [0.0; 16];
+ _mm256_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_storeu_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let mut array = [0.0; 32];
+ _mm512_storeu_ph(array.as_mut_ptr(), a);
+ assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_add_ph(a, b);
+ let e = _mm_set1_ph(9.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_add_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_add_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_add_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_set_ph() {
- let r = _mm256_set_ph(
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_add_ph(a, b);
+ let e = _mm256_set1_ph(17.0);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_add_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_add_ph() {
+ let a = _mm256_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
);
- let e = _mm256_setr_ph(
+ let b = _mm256_set_ph(
16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
);
+ let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_set_ph() {
- let r = _mm512_set_ph(
+ unsafe fn test_mm512_add_ph() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
- let e = _mm512_setr_ph(
+ let b = _mm512_set_ph(
32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
3.0, 2.0, 1.0,
);
+ let r = _mm512_add_ph(a, b);
+ let e = _mm512_set1_ph(33.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set_sh() {
- let r = _mm_set_sh(1.0);
- let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_set1_ph() {
- let r = _mm_set1_ph(1.0);
- let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_set1_ph() {
- let r = _mm256_set1_ph(1.0);
- let e = _mm256_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ unsafe fn test_mm512_mask_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_set1_ph() {
- let r = _mm512_set1_ph(1.0);
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
+ 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_setr_ph() {
- let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_maskz_add_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
+ 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_setr_ph() {
- let r = _mm256_setr_ph(
+ unsafe fn test_mm512_add_round_ph() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- let e = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
);
- assert_eq_m256h(r, e);
+ let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(33.0);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_setr_ph() {
- let r = _mm512_setr_ph(
+ unsafe fn test_mm512_mask_add_round_ph() {
+ let a = _mm512_set_ph(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
31.0, 32.0,
);
- let e = _mm512_set_ph(
+ let b = _mm512_set_ph(
32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
3.0, 2.0, 1.0,
);
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
+ 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ );
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_setzero_ph() {
- let r = _mm_setzero_ph();
- let e = _mm_set1_ph(0.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_setzero_ph() {
- let r = _mm256_setzero_ph();
- let e = _mm256_set1_ph(0.0);
- assert_eq_m256h(r, e);
- }
-
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_setzero_ph() {
- let r = _mm512_setzero_ph();
- let e = _mm512_set1_ph(0.0);
+ unsafe fn test_mm512_maskz_add_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
+ 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castsi128_ph() {
- let a = _mm_set1_epi16(0x3c00);
- let r = _mm_castsi128_ph(a);
- let e = _mm_set1_ph(1.0);
+ unsafe fn test_mm_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castsi256_ph() {
- let a = _mm256_set1_epi16(0x3c00);
- let r = _mm256_castsi256_ph(a);
- let e = _mm256_set1_ph(1.0);
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castsi512_ph() {
- let a = _mm512_set1_epi16(0x3c00);
- let r = _mm512_castsi512_ph(a);
- let e = _mm512_set1_ph(1.0);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_si128() {
- let a = _mm_set1_ph(1.0);
- let r = _mm_castph_si128(a);
- let e = _mm_set1_epi16(0x3c00);
- assert_eq_m128i(r, e);
+ unsafe fn test_mm_mask_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_si256() {
- let a = _mm256_set1_ph(1.0);
- let r = _mm256_castph_si256(a);
- let e = _mm256_set1_epi16(0x3c00);
- assert_eq_m256i(r, e);
+ unsafe fn test_mm_maskz_add_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_si512() {
- let a = _mm512_set1_ph(1.0);
- let r = _mm512_castph_si512(a);
- let e = _mm512_set1_epi16(0x3c00);
- assert_eq_m512i(r, e);
+ unsafe fn test_mm_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_add_sh(a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castps_ph() {
- let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
- let r = _mm_castps_ph(a);
- let e = _mm_set1_ph(1.0);
+ unsafe fn test_mm_mask_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_add_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_add_sh(src, 1, a, b);
+ let e = _mm_set_sh(3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castps_ph() {
- let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
- let r = _mm256_castps_ph(a);
- let e = _mm256_set1_ph(1.0);
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_maskz_add_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_add_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_add_sh(1, a, b);
+ let e = _mm_set_sh(3.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castps_ph() {
- let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
- let r = _mm512_castps_ph(a);
- let e = _mm512_set1_ph(1.0);
- assert_eq_m512h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_sub_ph(a, b);
+ let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_ps() {
- let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
- let r = _mm_castph_ps(a);
- let e = _mm_set1_ps(1.0);
- assert_eq_m128(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_ps() {
- let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
- let r = _mm256_castph_ps(a);
- let e = _mm256_set1_ps(1.0);
- assert_eq_m256(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_sub_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_sub_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_ps() {
- let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
- let r = _mm512_castph_ps(a);
- let e = _mm512_set1_ps(1.0);
- assert_eq_m512(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_sub_ph(a, b);
+ let e = _mm256_set_ph(
+ -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
+ 15.0,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castpd_ph() {
- let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
- let r = _mm_castpd_ph(a);
- let e = _mm_set1_ph(1.0);
- assert_eq_m128h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castpd_ph() {
- let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
- let r = _mm256_castpd_ph(a);
- let e = _mm256_set1_ph(1.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_sub_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castpd_ph() {
- let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
- let r = _mm512_castpd_ph(a);
- let e = _mm512_set1_ph(1.0);
+ unsafe fn test_mm512_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_sub_ph(a, b);
+ let e = _mm512_set_ph(
+ -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
+ -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
+ 23.0, 25.0, 27.0, 29.0, 31.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_castph_pd() {
- let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
- let r = _mm_castph_pd(a);
- let e = _mm_set1_pd(1.0);
- assert_eq_m128d(r, e);
+ unsafe fn test_mm512_mask_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
+ 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph_pd() {
- let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
- let r = _mm256_castph_pd(a);
- let e = _mm256_set1_pd(1.0);
- assert_eq_m256d(r, e);
+ unsafe fn test_mm512_maskz_sub_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
+ 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph_pd() {
- let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
- let r = _mm512_castph_pd(a);
- let e = _mm512_set1_pd(1.0);
- assert_eq_m512d(r, e);
+ unsafe fn test_mm512_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set_ph(
+ -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
+ -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
+ 23.0, 25.0, 27.0, 29.0, 31.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph256_ph128() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ unsafe fn test_mm512_mask_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- let r = _mm256_castph256_ph128(a);
- let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- assert_eq_m128h(r, e);
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
+ 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph512_ph128() {
- let a = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
- 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ unsafe fn test_mm512_maskz_sub_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
);
- let r = _mm512_castph512_ph128(a);
- let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
+ 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_sub_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(-1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph512_ph256() {
- let a = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
- 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
+ unsafe fn test_mm_mask_sub_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
);
- let r = _mm512_castph512_ph256(a);
- let e = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
);
- assert_eq_m256h(r, e);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_castph128_ph256() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm256_castph128_ph256(a);
- assert_eq_m128h(_mm256_castph256_ph128(r), a);
+ unsafe fn test_mm_maskz_sub_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph128_ph512() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm512_castph128_ph512(a);
- assert_eq_m128h(_mm512_castph512_ph128(r), a);
+ unsafe fn test_mm_sub_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_sub_sh(a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_castph256_ph512() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
- );
- let r = _mm512_castph256_ph512(a);
- assert_eq_m256h(_mm512_castph512_ph256(r), a);
+ unsafe fn test_mm_mask_sub_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_sub_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_sub_sh(src, 1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm256_zextph128_ph256() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm256_zextph128_ph256(a);
- let e = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
- );
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_maskz_sub_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_sub_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_sub_sh(1, a, b);
+ let e = _mm_set_sh(-1.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_zextph128_ph512() {
- let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
- let r = _mm512_zextph128_ph512(a);
- let e = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
- 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
- );
- assert_eq_m512h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_mul_ph(a, b);
+ let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_zextph256_ph512() {
- let a = _mm256_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
- );
- let r = _mm512_zextph256_ph512(a);
- let e = _mm512_setr_ph(
- 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
- 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
- );
- assert_eq_m512h(r, e);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
+ let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmp_round_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
- assert_eq!(r, 1);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_mul_ph() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
+ let r = _mm_maskz_mul_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmp_round_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
- assert_eq!(r, 0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_mul_ph(a, b);
+ let e = _mm256_set_ph(
+ 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
+ 30.0, 16.0,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmp_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
- assert_eq!(r, 1);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let src = _mm256_set_ph(
+ 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ );
+ let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmp_sh_mask() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
- assert_eq!(r, 0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_mul_ph() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let b = _mm256_set_ph(
+ 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
+ );
+ let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
+ );
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comi_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_mul_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_mul_ph(a, b);
+ let e = _mm512_set_ph(
+ 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
+ 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
+ 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comi_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_mask_mul_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
+ 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comieq_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comieq_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_maskz_mul_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
+ 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comige_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comige_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_mul_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set_ph(
+ 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
+ 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
+ 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comigt_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_comigt_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_mask_mul_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let src = _mm512_set_ph(
+ 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
+ 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ );
+ let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
+ 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comile_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_comile_sh(a, b);
- assert_eq!(r, 1);
+ unsafe fn test_mm512_maskz_mul_round_ph() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let b = _mm512_set_ph(
+ 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
+ 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
+ 3.0, 2.0, 1.0,
+ );
+ let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
+ 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comilt_sh() {
+ unsafe fn test_mm_mul_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_comilt_sh(a, b);
- assert_eq!(r, 1);
+ let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_comineq_sh() {
+ unsafe fn test_mm_mask_mul_round_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_comineq_sh(a, b);
- assert_eq!(r, 1);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomieq_sh() {
+ unsafe fn test_mm_maskz_mul_round_sh() {
let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomieq_sh(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomige_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomige_sh(a, b);
- assert_eq!(r, 1);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomigt_sh() {
- let a = _mm_set_sh(2.0);
- let b = _mm_set_sh(1.0);
- let r = _mm_ucomigt_sh(a, b);
- assert_eq!(r, 1);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomile_sh() {
+ unsafe fn test_mm_mul_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_ucomile_sh(a, b);
- assert_eq!(r, 1);
+ let r = _mm_mul_sh(a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomilt_sh() {
+ unsafe fn test_mm_mask_mul_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_ucomilt_sh(a, b);
- assert_eq!(r, 1);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_mul_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_mul_sh(src, 1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_ucomineq_sh() {
+ unsafe fn test_mm_maskz_mul_sh() {
let a = _mm_set_sh(1.0);
let b = _mm_set_sh(2.0);
- let r = _mm_ucomineq_sh(a, b);
- assert_eq!(r, 1);
+ let r = _mm_maskz_mul_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_mul_sh(1, a, b);
+ let e = _mm_set_sh(2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_load_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_load_ph(addr_of!(a).cast());
- assert_eq_m128h(a, b);
+ unsafe fn test_mm_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let r = _mm_div_ph(a, b);
+ let e = _mm_set1_ph(0.5);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_load_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_load_ph(addr_of!(a).cast());
- assert_eq_m256h(a, b);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_load_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_load_ph(addr_of!(a).cast());
- assert_eq_m512h(a, b);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_load_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_load_sh(addr_of!(a).cast());
- assert_eq_m128h(a, b);
+ unsafe fn test_mm_mask_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
+ let r = _mm_mask_div_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_load_sh() {
- let a = _mm_set_sh(1.0);
- let src = _mm_set_sh(2.);
- let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
- assert_eq_m128h(a, b);
- let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
- assert_eq_m128h(src, b);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_div_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let r = _mm_maskz_div_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_load_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
- assert_eq_m128h(a, b);
- let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
- assert_eq_m128h(_mm_setzero_ph(), b);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let r = _mm256_div_ph(a, b);
+ let e = _mm256_set1_ph(0.5);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_loadu_ph() {
- let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
- let r = _mm_loadu_ph(array.as_ptr());
- let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm256_mask_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let src = _mm256_set_ph(
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0,
+ );
+ let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_loadu_ph() {
- let array = [
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- ];
- let r = _mm256_loadu_ph(array.as_ptr());
- let e = _mm256_setr_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_div_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_loadu_ph() {
- let array = [
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- ];
- let r = _mm512_loadu_ph(array.as_ptr());
- let e = _mm512_setr_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
+ unsafe fn test_mm512_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_div_ph(a, b);
+ let e = _mm512_set1_ph(0.5);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let r = _mm_move_sh(a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_mask_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let src = _mm512_set_ph(
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
+ 33.0, 34.0, 35.0,
+ );
+ let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let src = _mm_set_sh(10.0);
- let r = _mm_mask_move_sh(src, 0, a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_maskz_div_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
+ let e = _mm512_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_move_sh() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_sh(9.0);
- let r = _mm_maskz_move_sh(0, a, b);
- let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_store_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let mut b = _mm_setzero_ph();
- _mm_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m128h(a, b);
+ unsafe fn test_mm512_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(0.5);
+ assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_store_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let src = _mm512_set_ph(
+ 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
+ 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
+ 33.0, 34.0, 35.0,
);
- let mut b = _mm256_setzero_ph();
- _mm256_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m256h(a, b);
+ let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ );
+ let e = _mm512_set_ph(
+ 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_store_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
+ unsafe fn test_mm512_maskz_div_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
);
- let mut b = _mm512_setzero_ph();
- _mm512_store_ph(addr_of_mut!(b).cast(), a);
- assert_eq_m512h(a, b);
+ let e = _mm512_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_store_sh() {
+ unsafe fn test_mm_div_round_sh() {
let a = _mm_set_sh(1.0);
- let mut b = _mm_setzero_ph();
- _mm_store_sh(addr_of_mut!(b).cast(), a);
- assert_eq_m128h(a, b);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_store_sh() {
+ unsafe fn test_mm_mask_div_round_sh() {
let a = _mm_set_sh(1.0);
- let mut b = _mm_setzero_ph();
- _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
- assert_eq_m128h(_mm_setzero_ph(), b);
- _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
- assert_eq_m128h(a, b);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_storeu_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let mut array = [0.0; 8];
- _mm_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_div_round_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r =
+ _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_storeu_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let mut array = [0.0; 16];
- _mm256_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_div_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_div_sh(a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_storeu_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let mut array = [0.0; 32];
- _mm512_storeu_ph(array.as_mut_ptr(), a);
- assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
+ unsafe fn test_mm_mask_div_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let src = _mm_set_sh(4.0);
+ let r = _mm_mask_div_sh(src, 0, a, b);
+ let e = _mm_set_sh(4.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_div_sh(src, 1, a, b);
+ let e = _mm_set_sh(0.5);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_add_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_add_ph(a, b);
- let e = _mm_set1_ph(9.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_div_sh() {
+ let a = _mm_set_sh(1.0);
+ let b = _mm_set_sh(2.0);
+ let r = _mm_maskz_div_sh(0, a, b);
+ let e = _mm_set_sh(0.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_div_sh(1, a, b);
+ let e = _mm_set_sh(0.5);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_add_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_add_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
+ unsafe fn test_mm_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_mul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_add_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_add_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
+ unsafe fn test_mm_mask_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_mul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_add_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_add_ph(a, b);
- let e = _mm256_set1_ph(17.0);
+ unsafe fn test_mm_maskz_mul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_maskz_mul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_mul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_add_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ unsafe fn test_mm256_mask_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
+ let r = _mm256_mask_mul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_add_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
+ unsafe fn test_mm256_maskz_mul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_mul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_add_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_add_ph(a, b);
- let e = _mm512_set1_ph(33.0);
+ unsafe fn test_mm512_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_mul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_add_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
- 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_add_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
- 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ unsafe fn test_mm512_maskz_mul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_add_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(33.0);
+ unsafe fn test_mm512_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_add_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
- 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_add_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_mul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
- 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_add_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(3.0);
+ unsafe fn test_mm_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_add_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(3.0);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_add_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm_maskz_mul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
let r =
- _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(3.0);
+ _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_add_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_add_sh(a, b);
- let e = _mm_set_sh(3.0);
+ unsafe fn test_mm_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_mul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_add_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_add_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_add_sh(src, 1, a, b);
- let e = _mm_set_sh(3.0);
+ unsafe fn test_mm_mask_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_mul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_add_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_add_sh(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_add_sh(1, a, b);
- let e = _mm_set_sh(3.0);
+ unsafe fn test_mm_maskz_mul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_maskz_mul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_sub_ph(a, b);
- let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
+ unsafe fn test_mm_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_fmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
+ unsafe fn test_mm_mask_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_fmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_sub_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_sub_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
+ unsafe fn test_mm_maskz_fmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 1.0);
+ let r = _mm_maskz_fmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_sub_ph(a, b);
- let e = _mm256_set_ph(
- -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
- 15.0,
- );
+ unsafe fn test_mm256_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_fmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ unsafe fn test_mm256_mask_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
+ let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_sub_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
+ unsafe fn test_mm256_maskz_fmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_sub_ph(a, b);
- let e = _mm512_set_ph(
- -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
- -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
- 23.0, 25.0, 27.0, 29.0, 31.0,
- );
+ unsafe fn test_mm512_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_fmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
- 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sub_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
- 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ unsafe fn test_mm512_maskz_fmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set_ph(
- -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
- -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
- 23.0, 25.0, 27.0, 29.0, 31.0,
- );
+ unsafe fn test_mm512_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
- 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sub_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_fmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 1.0);
+ let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
- 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(-1.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(-1.0);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sub_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm_maskz_fmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
let r =
- _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(-1.0);
+ _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_sub_sh(a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_fmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_sub_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_sub_sh(src, 1, a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_mask_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sub_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_sub_sh(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_sub_sh(1, a, b);
- let e = _mm_set_sh(-1.0);
+ unsafe fn test_mm_maskz_fmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let r = _mm_maskz_fmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_mul_ph(a, b);
- let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
+ unsafe fn test_mm_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_cmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
- let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
+ unsafe fn test_mm_mask_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_cmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_mul_ph() {
- let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
- let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
- let r = _mm_maskz_mul_ph(0b01010101, a, b);
- let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
+ unsafe fn test_mm_maskz_cmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_maskz_cmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_mul_ph(a, b);
- let e = _mm256_set_ph(
- 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
- 30.0, 16.0,
- );
+ unsafe fn test_mm256_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_cmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let src = _mm256_set_ph(
- 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
+ unsafe fn test_mm256_mask_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
+ let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_mul_ph() {
- let a = _mm256_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- );
- let b = _mm256_set_ph(
- 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
- );
- let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
+ unsafe fn test_mm256_maskz_cmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_mul_ph(a, b);
- let e = _mm512_set_ph(
- 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
- 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
- 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
- );
+ unsafe fn test_mm512_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_cmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
- 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
- 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ unsafe fn test_mm512_maskz_cmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set_ph(
- 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
- 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
- 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
- );
+ unsafe fn test_mm512_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let src = _mm512_set_ph(
- 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
- 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
+ unsafe fn test_mm512_mask_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
- 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_round_ph() {
- let a = _mm512_set_ph(
- 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
- 31.0, 32.0,
- );
- let b = _mm512_set_ph(
- 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
- 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
- 3.0, 2.0, 1.0,
- );
- let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_cmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
- 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_cmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_mask_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_cmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r =
- _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_maskz_cmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_maskz_cmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_mul_sh(a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_mul_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_mul_sh(src, 1, a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_mask_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_mul_sh(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_mul_sh(1, a, b);
- let e = _mm_set_sh(2.0);
+ unsafe fn test_mm_maskz_cmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r =
+ _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_div_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let r = _mm_div_ph(a, b);
- let e = _mm_set1_ph(0.5);
+ unsafe fn test_mm_fcmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_fcmul_pch(a, b);
+ let e = _mm_set1_pch(-1.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_div_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
- let r = _mm_mask_div_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
+ unsafe fn test_mm_mask_fcmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
+ let r = _mm_mask_fcmul_pch(src, 0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_div_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let r = _mm_maskz_div_ph(0b01010101, a, b);
- let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ unsafe fn test_mm_maskz_fcmul_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, -1.0);
+ let r = _mm_maskz_fcmul_pch(0b0101, a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_div_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let r = _mm256_div_ph(a, b);
- let e = _mm256_set1_ph(0.5);
+ unsafe fn test_mm256_fcmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_fcmul_pch(a, b);
+ let e = _mm256_set1_pch(-1.0, 0.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_div_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let src = _mm256_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0,
+ unsafe fn test_mm256_mask_fcmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let src = _mm256_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
+ let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_div_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
- let e = _mm256_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ unsafe fn test_mm256_maskz_fcmul_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, -1.0);
+ let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
+ let e = _mm256_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_div_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_div_ph(a, b);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_fcmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_fcmul_pch(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_div_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let src = _mm512_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
- 33.0, 34.0, 35.0,
+ unsafe fn test_mm512_mask_fcmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
- 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_div_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
- let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ unsafe fn test_mm512_maskz_fcmul_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_div_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_fcmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pch(-1.0, 0.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_div_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let src = _mm512_set_ph(
- 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
- 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
- 33.0, 34.0, 35.0,
+ unsafe fn test_mm512_mask_fcmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let src = _mm512_setr_ph(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
+ 32.0, 33.0,
);
- let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
- 0b01010101010101010101010101010101,
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
- 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_div_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ unsafe fn test_mm512_maskz_fcmul_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, -1.0);
+ let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
a,
b,
);
- let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ let e = _mm512_setr_ph(
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_fcmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_fcmul_sch(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_mask_fcmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fcmul_sch(src, 0, a, b);
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_div_round_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r =
- _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_set_sh(0.0);
- assert_eq_m128h(r, e);
- let r =
- _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_maskz_fcmul_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_maskz_fcmul_sch(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_div_sh(a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_fcmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let src = _mm_set_sh(4.0);
- let r = _mm_mask_div_sh(src, 0, a, b);
- let e = _mm_set_sh(4.0);
- assert_eq_m128h(r, e);
- let r = _mm_mask_div_sh(src, 1, a, b);
- let e = _mm_set_sh(0.5);
+ unsafe fn test_mm_mask_fcmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
+ let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_div_sh() {
- let a = _mm_set_sh(1.0);
- let b = _mm_set_sh(2.0);
- let r = _mm_maskz_div_sh(0, a, b);
- let e = _mm_set_sh(0.0);
+ unsafe fn test_mm_maskz_fcmul_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
+ let r =
+ _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
- let r = _mm_maskz_div_sh(1, a, b);
- let e = _mm_set_sh(0.5);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_abs_ph() {
+ let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0);
+ let r = _mm_abs_ph(a);
+ let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mul_pch() {
+ unsafe fn test_mm256_abs_ph() {
+ let a = _mm256_set_ph(
+ -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
+ -14.0,
+ );
+ let r = _mm256_abs_ph(a);
+ let e = _mm256_set_ph(
+ 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_abs_ph() {
+ let a = _mm512_set_ph(
+ -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
+ -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0,
+ 27.0, -28.0, 29.0, -30.0,
+ );
+ let r = _mm512_abs_ph(a);
+ let e = _mm512_set_ph(
+ 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
+ 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0,
+ 29.0, 30.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_conj_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_mul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ let r = _mm_conj_pch(a);
+ let e = _mm_set1_pch(0.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_mul_pch() {
+ unsafe fn test_mm_mask_conj_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_mul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ let r = _mm_mask_conj_pch(src, 0b0101, a);
+ let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_mul_pch() {
+ unsafe fn test_mm_maskz_conj_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_maskz_mul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ let r = _mm_maskz_conj_pch(0b0101, a);
+ let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mul_pch() {
+ unsafe fn test_mm256_conj_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_mul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ let r = _mm256_conj_pch(a);
+ let e = _mm256_set1_pch(0.0, -1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_mul_pch() {
+ unsafe fn test_mm256_mask_conj_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
let src = _mm256_setr_ph(
2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
);
- let r = _mm256_mask_mul_pch(src, 0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- );
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_mul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_maskz_mul_pch(0b01010101, a, b);
+ let r = _mm256_mask_conj_pch(src, 0b01010101, a);
let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
);
assert_eq_m256h(r, e);
}
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_mul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_conj_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let r = _mm256_maskz_conj_pch(0b01010101, a);
+ let e = _mm256_setr_ph(
+ 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
);
- assert_eq_m512h(r, e);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mul_round_pch() {
+ unsafe fn test_mm512_conj_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let r = _mm512_conj_pch(a);
+ let e = _mm512_set1_pch(0.0, -1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_mul_round_pch() {
+ unsafe fn test_mm512_mask_conj_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
let src = _mm512_setr_ph(
2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
32.0, 33.0,
);
- let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
- 0b0101010101010101,
- a,
- b,
- );
+ let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
+ 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
+ 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0,
33.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_mul_round_pch() {
+ unsafe fn test_mm512_maskz_conj_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
- a,
- b,
- );
+ let r = _mm512_maskz_conj_pch(0b0101010101010101, a);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
+ 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r =
- _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_mul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_mul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_mul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_mul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_maskz_mul_sch(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_fmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_fmadd_pch(a, b, c);
+ let e = _mm_set1_pch(-2.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmul_pch() {
+ unsafe fn test_mm_mask_fmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_fmul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask_fmadd_pch(a, 0b0101, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmul_pch() {
+ unsafe fn test_mm_mask3_fmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_fmul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101);
+ let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmul_pch() {
+ unsafe fn test_mm_maskz_fmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 1.0);
- let r = _mm_maskz_fmul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_maskz_fmadd_pch(0b0101, a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmul_pch() {
+ unsafe fn test_mm256_fmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_fmul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_fmadd_pch(a, b, c);
+ let e = _mm256_set1_pch(-2.0, 3.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmul_pch() {
+ unsafe fn test_mm256_mask_fmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c);
+ let e = _mm256_setr_ph(
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
);
- let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101);
let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmul_pch() {
+ unsafe fn test_mm256_maskz_fmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c);
let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmul_pch() {
+ unsafe fn test_mm512_fmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_fmul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_fmadd_pch(a, b, c);
+ let e = _mm512_set1_pch(-2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmul_pch() {
+ unsafe fn test_mm512_mask_fmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c);
+ let e = _mm512_setr_ph(
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
);
- let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmul_pch() {
+ unsafe fn test_mm512_maskz_fmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmul_round_pch() {
+ unsafe fn test_mm512_fmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r =
+ _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pch(-2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmul_round_pch() {
+ unsafe fn test_mm512_mask_fmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
0b0101010101010101,
+ b,
+ c,
+ );
+ let e = _mm512_setr_ph(
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b0101010101010101,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmul_round_pch() {
+ unsafe fn test_mm512_maskz_fmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b0101010101010101,
a,
b,
+ c,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmul_round_sch() {
+ unsafe fn test_mm_fmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fmadd_sch(a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmul_round_sch() {
+ unsafe fn test_mm_mask_fmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fmadd_sch(a, 0, b, c);
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fmadd_sch(a, 1, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmul_round_sch() {
+ unsafe fn test_mm_mask3_fmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r =
- _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fmadd_sch(a, b, c, 0);
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fmadd_sch(a, b, c, 1);
+ let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fmadd_sch(0, a, b, c);
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmadd_sch(1, a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmul_sch() {
+ unsafe fn test_mm_fmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_fmul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmul_sch() {
+ unsafe fn test_mm_mask_fmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fmul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmul_sch() {
+ unsafe fn test_mm_mask3_fmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let r = _mm_maskz_fmul_sch(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
+ );
+ let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_cmul_pch() {
+ unsafe fn test_mm_fcmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_cmul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_fcmadd_pch(a, b, c);
+ let e = _mm_set1_pch(2.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_cmul_pch() {
+ unsafe fn test_mm_mask_fcmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_cmul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_cmul_pch() {
+ unsafe fn test_mm_mask3_fcmadd_pch() {
let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_maskz_cmul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101);
+ let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_cmul_pch() {
+ unsafe fn test_mm_maskz_fcmadd_pch() {
+ let a = _mm_set1_pch(0.0, 1.0);
+ let b = _mm_set1_pch(0.0, 2.0);
+ let c = _mm_set1_pch(0.0, 3.0);
+ let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fcmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_cmul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_fcmadd_pch(a, b, c);
+ let e = _mm256_set1_pch(2.0, 3.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_cmul_pch() {
+ unsafe fn test_mm256_mask_fcmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c);
+ let e = _mm256_setr_ph(
+ 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
);
- let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fcmadd_pch() {
+ let a = _mm256_set1_pch(0.0, 1.0);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101);
let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_cmul_pch() {
+ unsafe fn test_mm256_maskz_fcmadd_pch() {
let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
+ let b = _mm256_set1_pch(0.0, 2.0);
+ let c = _mm256_set1_pch(0.0, 3.0);
+ let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c);
let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_cmul_pch() {
+ unsafe fn test_mm512_fcmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_cmul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_fcmadd_pch(a, b, c);
+ let e = _mm512_set1_pch(2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_cmul_pch() {
+ unsafe fn test_mm512_mask_fcmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c);
+ let e = _mm512_setr_ph(
+ 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
+ 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
);
- let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fcmadd_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
+ 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_cmul_pch() {
+ unsafe fn test_mm512_maskz_fcmadd_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
+ 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_cmul_round_pch() {
+ unsafe fn test_mm512_fcmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r =
+ _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pch(2.0, 3.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_cmul_round_pch() {
+ unsafe fn test_mm512_mask_fcmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a,
0b0101010101010101,
+ b,
+ c,
+ );
+ let e = _mm512_setr_ph(
+ 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
+ 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fcmadd_round_pch() {
+ let a = _mm512_set1_pch(0.0, 1.0);
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b0101010101010101,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
+ 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_cmul_round_pch() {
+ unsafe fn test_mm512_maskz_fcmadd_round_pch() {
let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let b = _mm512_set1_pch(0.0, 2.0);
+ let c = _mm512_set1_pch(0.0, 3.0);
+ let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b0101010101010101,
a,
b,
+ c,
);
let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
+ 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmul_sch() {
+ unsafe fn test_mm_fcmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_cmul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fcmadd_sch(a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmul_sch() {
+ unsafe fn test_mm_mask_fcmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_cmul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fcmadd_sch(a, 0, b, c);
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fcmadd_sch(a, 1, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_cmul_sch() {
+ unsafe fn test_mm_mask3_fcmadd_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_maskz_cmul_sch(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fcmadd_sch(a, b, c, 0);
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fcmadd_sch(a, b, c, 1);
+ let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fcmadd_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fcmadd_sch(0, a, b, c);
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fcmadd_sch(1, a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_cmul_round_sch() {
+ unsafe fn test_mm_fcmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_cmul_round_sch() {
+ unsafe fn test_mm_mask_fcmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
+ );
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_cmul_round_sch() {
+ unsafe fn test_mm_mask3_fcmadd_round_sch() {
let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r =
- _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
+ );
+ let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_fcmadd_round_sch() {
+ let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
+ let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
assert_eq_m128h(r, e);
+ let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fcmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_fcmul_pch(a, b);
- let e = _mm_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fmadd_ph(a, b, c);
+ let e = _mm_set1_ph(5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fcmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_fcmul_pch(src, 0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
+ unsafe fn test_mm_mask_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fcmul_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, -1.0);
- let r = _mm_maskz_fcmul_pch(0b0101, a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
+ unsafe fn test_mm_mask3_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fcmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_fcmul_pch(a, b);
- let e = _mm256_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm_maskz_fmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fmadd_ph(a, b, c);
+ let e = _mm256_set1_ph(5.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fcmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
+ unsafe fn test_mm256_mask_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c);
+ let e = _mm256_set_ph(
+ 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
);
- let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask3_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101);
+ let e = _mm256_set_ph(
+ 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fcmul_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, -1.0);
- let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
- let e = _mm256_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm256_maskz_fmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_fcmul_pch(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm512_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmadd_ph(a, b, c);
+ let e = _mm512_set1_ph(5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ unsafe fn test_mm512_mask_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let e = _mm512_set_ph(
+ 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
+ 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
);
- let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask3_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let e = _mm512_set_ph(
+ 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
+ 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmul_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ unsafe fn test_mm512_maskz_fmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
+ 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_pch(-1.0, 0.0);
+ unsafe fn test_mm512_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
- );
- let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src,
- 0b0101010101010101,
+ unsafe fn test_mm512_mask_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
+ 0b01010101010101010101010101010101,
b,
+ c,
);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
- -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
- 33.0,
+ let e = _mm512_set_ph(
+ 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
+ 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmul_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, -1.0);
- let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
+ unsafe fn test_mm512_mask3_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
+ c,
+ 0b01010101010101010101010101010101,
);
- let e = _mm512_setr_ph(
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
- -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
+ let e = _mm512_set_ph(
+ 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
+ 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_fcmul_sch(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fcmul_sch(src, 0, a, b);
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- assert_eq_m128h(r, e);
+ unsafe fn test_mm512_maskz_fmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ b,
+ c,
+ );
+ let e = _mm512_set_ph(
+ 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
+ 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ );
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmul_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_maskz_fcmul_sch(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmadd_sh(a, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmadd_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
- let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_mask_fmadd_sh(a, 1, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmul_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
- let r =
- _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask3_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmadd_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_abs_ph() {
- let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0);
- let r = _mm_abs_ph(a);
- let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
+ let r = _mm_mask3_fmadd_sh(a, b, c, 1);
+ let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_abs_ph() {
- let a = _mm256_set_ph(
- -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
- -14.0,
- );
- let r = _mm256_abs_ph(a);
- let e = _mm256_set_ph(
- 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
- );
- assert_eq_m256h(r, e);
- }
-
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_abs_ph() {
- let a = _mm512_set_ph(
- -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
- -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0,
- 27.0, -28.0, 29.0, -30.0,
- );
- let r = _mm512_abs_ph(a);
- let e = _mm512_set_ph(
- 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
- 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0,
- 29.0, 30.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_conj_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let r = _mm_conj_pch(a);
- let e = _mm_set1_pch(0.0, -1.0);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_conj_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
- let r = _mm_mask_conj_pch(src, 0b0101, a);
- let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0);
+ unsafe fn test_mm_maskz_fmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmadd_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_conj_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let r = _mm_maskz_conj_pch(0b0101, a);
- let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0);
+ let r = _mm_maskz_fmadd_sh(1, a, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_conj_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_conj_pch(a);
- let e = _mm256_set1_pch(0.0, -1.0);
- assert_eq_m256h(r, e);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_conj_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let src = _mm256_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- );
- let r = _mm256_mask_conj_pch(src, 0b01010101, a);
- let e = _mm256_setr_ph(
- 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
);
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_conj_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let r = _mm256_maskz_conj_pch(0b01010101, a);
- let e = _mm256_setr_ph(
- 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 1, b, c,
);
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_conj_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_conj_pch(a);
- let e = _mm512_set1_pch(0.0, -1.0);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_conj_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let src = _mm512_setr_ph(
- 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
- 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
- 32.0, 33.0,
+ unsafe fn test_mm_mask3_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
);
- let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a);
- let e = _mm512_setr_ph(
- 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
- 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0,
- 33.0,
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 1,
);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_conj_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let r = _mm512_maskz_conj_pch(0b0101010101010101, a);
- let e = _mm512_setr_ph(
- 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
- 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
+ unsafe fn test_mm_maskz_fmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 1, a, b, c,
+ );
+ let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_fmadd_pch(a, b, c);
- let e = _mm_set1_pch(-2.0, 3.0);
+ unsafe fn test_mm_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fmsub_ph(a, b, c);
+ let e = _mm_set1_ph(-1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask_fmadd_pch(a, 0b0101, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0);
+ unsafe fn test_mm_mask_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101);
- let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0);
+ unsafe fn test_mm_mask3_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_maskz_fmadd_pch(0b0101, a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0);
+ unsafe fn test_mm_maskz_fmsub_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_fmadd_pch(a, b, c);
- let e = _mm256_set1_pch(-2.0, 3.0);
+ unsafe fn test_mm256_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fmsub_ph(a, b, c);
+ let e = _mm256_set1_ph(-1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c);
- let e = _mm256_setr_ph(
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ unsafe fn test_mm256_mask_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c);
+ let e = _mm256_set_ph(
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101);
- let e = _mm256_setr_ph(
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm256_mask3_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101);
+ let e = _mm256_set_ph(
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c);
- let e = _mm256_setr_ph(
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm256_maskz_fmsub_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_fmadd_pch(a, b, c);
- let e = _mm512_set1_pch(-2.0, 3.0);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
- );
+ unsafe fn test_mm512_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmsub_ph(a, b, c);
+ let e = _mm512_set1_ph(-1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm512_mask_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c);
+ let e = _mm512_set_ph(
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm512_mask3_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101);
+ let e = _mm512_set_ph(
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r =
- _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_pch(-2.0, 3.0);
+ unsafe fn test_mm512_maskz_fmsub_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(-1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
b,
c,
);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
- -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
+ let e = _mm512_set_ph(
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask3_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
- -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
+ let e = _mm512_set_ph(
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
+ unsafe fn test_mm512_maskz_fmsub_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
c,
);
- let e = _mm512_setr_ph(
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
- -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
+ let e = _mm512_set_ph(
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fmadd_sch(a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmsub_sh(a, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fmadd_sch(a, 0, b, c);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmsub_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_sch(a, 1, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_mask_fmsub_sh(a, 1, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fmadd_sch(a, b, c, 0);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ unsafe fn test_mm_mask3_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmsub_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_sch(a, b, c, 1);
- let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fmsub_sh(a, b, c, 1);
+ let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fmadd_sch(0, a, b, c);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_maskz_fmsub_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmsub_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_sch(1, a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_maskz_fmsub_sh(1, a, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 0, b, c,
);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 1, b, c,
);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask3_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 0,
);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 1,
);
- let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_maskz_fmsub_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0, a, b, c,
);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
1, a, b, c,
);
- let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_fcmadd_pch(a, b, c);
- let e = _mm_set1_pch(2.0, 3.0);
+ unsafe fn test_mm_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_fnmadd_ph(a, b, c);
+ let e = _mm_set1_ph(1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0);
+ unsafe fn test_mm_mask_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101);
- let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0);
+ unsafe fn test_mm_mask3_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fcmadd_pch() {
- let a = _mm_set1_pch(0.0, 1.0);
- let b = _mm_set1_pch(0.0, 2.0);
- let c = _mm_set1_pch(0.0, 3.0);
- let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0);
+ unsafe fn test_mm_maskz_fnmadd_ph() {
+ let a = _mm_set1_ph(1.0);
+ let b = _mm_set1_ph(2.0);
+ let c = _mm_set1_ph(3.0);
+ let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_fcmadd_pch(a, b, c);
- let e = _mm256_set1_pch(2.0, 3.0);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_fnmadd_ph(a, b, c);
+ let e = _mm256_set1_ph(1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c);
- let e = _mm256_setr_ph(
- 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ unsafe fn test_mm256_mask_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c);
+ let e = _mm256_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101);
- let e = _mm256_setr_ph(
- 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm256_mask3_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101);
+ let e = _mm256_set_ph(
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fcmadd_pch() {
- let a = _mm256_set1_pch(0.0, 1.0);
- let b = _mm256_set1_pch(0.0, 2.0);
- let c = _mm256_set1_pch(0.0, 3.0);
- let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c);
- let e = _mm256_setr_ph(
- 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm256_maskz_fnmadd_ph() {
+ let a = _mm256_set1_ph(1.0);
+ let b = _mm256_set1_ph(2.0);
+ let c = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c);
+ let e = _mm256_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_fcmadd_pch(a, b, c);
- let e = _mm512_set1_pch(2.0, 3.0);
+ unsafe fn test_mm512_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_fnmadd_ph(a, b, c);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
- 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ unsafe fn test_mm512_mask_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
- 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
+ unsafe fn test_mm512_mask3_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let e = _mm512_set_ph(
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmadd_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
- 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
+ unsafe fn test_mm512_maskz_fnmadd_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let e = _mm512_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
+ unsafe fn test_mm512_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
let r =
- _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_pch(2.0, 3.0);
+ _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
b,
c,
);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
- 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
+ let e = _mm512_set_ph(
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask3_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
- 0b0101010101010101,
+ 0b01010101010101010101010101010101,
);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
- 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
+ let e = _mm512_set_ph(
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fcmadd_round_pch() {
- let a = _mm512_set1_pch(0.0, 1.0);
- let b = _mm512_set1_pch(0.0, 2.0);
- let c = _mm512_set1_pch(0.0, 3.0);
- let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b0101010101010101,
+ unsafe fn test_mm512_maskz_fnmadd_round_ph() {
+ let a = _mm512_set1_ph(1.0);
+ let b = _mm512_set1_ph(2.0);
+ let c = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
c,
);
- let e = _mm512_setr_ph(
- 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
- 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
+ let e = _mm512_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fcmadd_sch(a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fnmadd_sh(a, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fcmadd_sch(a, 0, b, c);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_mask_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fnmadd_sh(a, 0, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fcmadd_sch(a, 1, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_mask_fnmadd_sh(a, 1, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fcmadd_sch(a, b, c, 0);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ unsafe fn test_mm_mask3_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmadd_sh(a, b, c, 0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fcmadd_sch(a, b, c, 1);
- let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let r = _mm_mask3_fnmadd_sh(a, b, c, 1);
+ let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmadd_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fcmadd_sch(0, a, b, c);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_maskz_fnmadd_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fnmadd_sh(0, a, b, c);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fcmadd_sch(1, a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let r = _mm_maskz_fnmadd_sh(1, a, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ unsafe fn test_mm_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 0, b, c,
);
- let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 1, b, c,
);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask3_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 0,
);
- let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 1,
);
- let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
+ let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fcmadd_round_sch() {
- let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
- let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
- let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
- let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_maskz_fnmadd_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
+ let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0, a, b, c,
);
- let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
1, a, b, c,
);
- let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmadd_ph() {
+ unsafe fn test_mm_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_fmadd_ph(a, b, c);
- let e = _mm_set1_ph(5.0);
+ let r = _mm_fnmsub_ph(a, b, c);
+ let e = _mm_set1_ph(-5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmadd_ph() {
+ unsafe fn test_mm_mask_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0);
+ let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c);
+ let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmadd_ph() {
+ unsafe fn test_mm_mask3_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0);
+ let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101);
+ let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmadd_ph() {
+ unsafe fn test_mm_maskz_fnmsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0);
+ let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c);
+ let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmadd_ph() {
+ unsafe fn test_mm256_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmadd_ph(a, b, c);
- let e = _mm256_set1_ph(5.0);
+ let r = _mm256_fnmsub_ph(a, b, c);
+ let e = _mm256_set1_ph(-5.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmadd_ph() {
+ unsafe fn test_mm256_mask_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c);
+ let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c);
let e = _mm256_set_ph(
- 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmadd_ph() {
+ unsafe fn test_mm256_mask3_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101);
+ let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101);
let e = _mm256_set_ph(
- 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmadd_ph() {
+ unsafe fn test_mm256_maskz_fnmsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c);
+ let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c);
let e = _mm256_set_ph(
- 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_ph() {
+ unsafe fn test_mm512_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmadd_ph(a, b, c);
- let e = _mm512_set1_ph(5.0);
+ let r = _mm512_fnmsub_ph(a, b, c);
+ let e = _mm512_set1_ph(-5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_ph() {
+ unsafe fn test_mm512_mask_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c);
let e = _mm512_set_ph(
- 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
- 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_ph() {
+ unsafe fn test_mm512_mask3_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101);
let e = _mm512_set_ph(
- 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
- 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_ph() {
+ unsafe fn test_mm512_maskz_fnmsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c);
let e = _mm512_set_ph(
- 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
- 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmadd_round_ph() {
+ unsafe fn test_mm512_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(5.0);
+ let r =
+ _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_ph(-5.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmadd_round_ph() {
+ unsafe fn test_mm512_mask_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
0b01010101010101010101010101010101,
b,
c,
);
let e = _mm512_set_ph(
- 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
- 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
+ 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmadd_round_ph() {
+ unsafe fn test_mm512_mask3_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
0b01010101010101010101010101010101,
);
let e = _mm512_set_ph(
- 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
- 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmadd_round_ph() {
+ unsafe fn test_mm512_maskz_fnmsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
b,
c,
);
let e = _mm512_set_ph(
- 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
- 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_sh() {
+ unsafe fn test_mm_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmadd_sh(a, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_fnmsub_sh(a, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_sh() {
+ unsafe fn test_mm_mask_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmadd_sh(a, 0, b, c);
+ let r = _mm_mask_fnmsub_sh(a, 0, b, c);
let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_sh(a, 1, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_mask_fnmsub_sh(a, 1, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_sh() {
+ unsafe fn test_mm_mask3_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmadd_sh(a, b, c, 0);
+ let r = _mm_mask3_fnmsub_sh(a, b, c, 0);
let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_sh(a, b, c, 1);
- let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask3_fnmsub_sh(a, b, c, 1);
+ let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_sh() {
+ unsafe fn test_mm_maskz_fnmsub_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmadd_sh(0, a, b, c);
+ let r = _mm_maskz_fnmsub_sh(0, a, b, c);
let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_sh(1, a, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_maskz_fnmsub_sh(1, a, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmadd_round_sh() {
+ unsafe fn test_mm_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmadd_round_sh() {
+ unsafe fn test_mm_mask_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 0, b, c,
);
let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 1, b, c,
);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmadd_round_sh() {
+ unsafe fn test_mm_mask3_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 0,
);
let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, b, c, 1,
);
- let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
+ let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmadd_round_sh() {
+ unsafe fn test_mm_maskz_fnmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0, a, b, c,
);
let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
1, a, b, c,
);
- let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmsub_ph() {
+ unsafe fn test_mm_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_fmsub_ph(a, b, c);
- let e = _mm_set1_ph(-1.0);
+ let r = _mm_fmaddsub_ph(a, b, c);
+ let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmsub_ph() {
+ unsafe fn test_mm_mask_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0);
+ let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c);
+ let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmsub_ph() {
+ unsafe fn test_mm_mask3_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0);
+ let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011);
+ let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmsub_ph() {
+ unsafe fn test_mm_maskz_fmaddsub_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0);
+ let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c);
+ let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmsub_ph() {
+ unsafe fn test_mm256_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmsub_ph(a, b, c);
- let e = _mm256_set1_ph(-1.0);
+ let r = _mm256_fmaddsub_ph(a, b, c);
+ let e = _mm256_set_ph(
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmsub_ph() {
+ unsafe fn test_mm256_mask_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c);
+ let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c);
let e = _mm256_set_ph(
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmsub_ph() {
+ unsafe fn test_mm256_mask3_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101);
+ let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011);
let e = _mm256_set_ph(
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmsub_ph() {
+ unsafe fn test_mm256_maskz_fmaddsub_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c);
+ let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c);
let e = _mm256_set_ph(
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsub_ph() {
+ unsafe fn test_mm512_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmsub_ph(a, b, c);
- let e = _mm512_set1_ph(-1.0);
+ let r = _mm512_fmaddsub_ph(a, b, c);
+ let e = _mm512_set_ph(
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsub_ph() {
+ unsafe fn test_mm512_mask_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c);
+ let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c);
let e = _mm512_set_ph(
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsub_ph() {
+ unsafe fn test_mm512_mask3_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101);
+ let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011);
let e = _mm512_set_ph(
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsub_ph() {
+ unsafe fn test_mm512_maskz_fmaddsub_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c);
+ let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c);
let e = _mm512_set_ph(
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsub_round_ph() {
+ unsafe fn test_mm512_fmaddsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(-1.0);
+ let r =
+ _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set_ph(
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsub_round_ph() {
+ unsafe fn test_mm512_mask_fmaddsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- 0b01010101010101010101010101010101,
+ 0b00110011001100110011001100110011,
b,
c,
);
let e = _mm512_set_ph(
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
- 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
+ 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsub_round_ph() {
+ unsafe fn test_mm512_mask3_fmaddsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
- 0b01010101010101010101010101010101,
+ 0b00110011001100110011001100110011,
);
let e = _mm512_set_ph(
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
- 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsub_round_ph() {
+ unsafe fn test_mm512_maskz_fmaddsub_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00110011001100110011001100110011,
a,
b,
c,
);
let e = _mm512_set_ph(
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
- 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmsub_sh(a, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmsub_sh(a, 0, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fmsub_sh(a, 1, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmsub_sh(a, b, c, 0);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- let r = _mm_mask3_fmsub_sh(a, b, c, 1);
- let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmsub_sh(0, a, b, c);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fmsub_sh(1, a, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 0, b, c,
- );
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 1, b, c,
- );
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 0,
- );
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 1,
- );
- let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0, a, b, c,
- );
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 1, a, b, c,
- );
- let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fnmadd_ph() {
+ unsafe fn test_mm_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_fnmadd_ph(a, b, c);
- let e = _mm_set1_ph(1.0);
+ let r = _mm_fmsubadd_ph(a, b, c);
+ let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fnmadd_ph() {
+ unsafe fn test_mm_mask_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
+ let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c);
+ let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fnmadd_ph() {
+ unsafe fn test_mm_mask3_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
+ let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011);
+ let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fnmadd_ph() {
+ unsafe fn test_mm_maskz_fmsubadd_ph() {
let a = _mm_set1_ph(1.0);
let b = _mm_set1_ph(2.0);
let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+ let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c);
+ let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fnmadd_ph() {
+ unsafe fn test_mm256_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_fnmadd_ph(a, b, c);
- let e = _mm256_set1_ph(1.0);
+ let r = _mm256_fmsubadd_ph(a, b, c);
+ let e = _mm256_set_ph(
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ );
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fnmadd_ph() {
+ unsafe fn test_mm256_mask_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c);
+ let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c);
let e = _mm256_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fnmadd_ph() {
+ unsafe fn test_mm256_mask3_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101);
+ let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011);
let e = _mm256_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fnmadd_ph() {
+ unsafe fn test_mm256_maskz_fmsubadd_ph() {
let a = _mm256_set1_ph(1.0);
let b = _mm256_set1_ph(2.0);
let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c);
+ let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c);
let e = _mm256_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmadd_ph() {
+ unsafe fn test_mm512_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_fnmadd_ph(a, b, c);
- let e = _mm512_set1_ph(1.0);
+ let r = _mm512_fmsubadd_ph(a, b, c);
+ let e = _mm512_set_ph(
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmadd_ph() {
+ unsafe fn test_mm512_mask_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c);
+ let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c);
let e = _mm512_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmadd_ph() {
+ unsafe fn test_mm512_mask3_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101);
+ let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011);
let e = _mm512_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
- 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmadd_ph() {
+ unsafe fn test_mm512_maskz_fmsubadd_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c);
+ let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c);
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmadd_round_ph() {
+ unsafe fn test_mm512_fmsubadd_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
let r =
- _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(1.0);
+ _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set_ph(
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ );
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmadd_round_ph() {
+ unsafe fn test_mm512_mask_fmsubadd_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- 0b01010101010101010101010101010101,
+ 0b00110011001100110011001100110011,
b,
c,
);
let e = _mm512_set_ph(
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
+ 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmadd_round_ph() {
+ unsafe fn test_mm512_mask3_fmsubadd_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
b,
c,
- 0b01010101010101010101010101010101,
+ 0b00110011001100110011001100110011,
);
let e = _mm512_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
- 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmadd_round_ph() {
+ unsafe fn test_mm512_maskz_fmsubadd_round_ph() {
let a = _mm512_set1_ph(1.0);
let b = _mm512_set1_ph(2.0);
let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b01010101010101010101010101010101,
+ let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00110011001100110011001100110011,
a,
b,
c,
);
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_rcp_ph() {
+ let a = _mm_set1_ph(2.0);
+ let r = _mm_rcp_ph(a);
+ let e = _mm_set1_ph(0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_rcp_ph() {
+ let a = _mm_set1_ph(2.0);
+ let src = _mm_set1_ph(1.0);
+ let r = _mm_mask_rcp_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_rcp_ph() {
+ let a = _mm_set1_ph(2.0);
+ let r = _mm_maskz_rcp_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_rcp_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let r = _mm256_rcp_ph(a);
+ let e = _mm256_set1_ph(0.5);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_rcp_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let src = _mm256_set1_ph(1.0);
+ let r = _mm256_mask_rcp_ph(src, 0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_rcp_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let r = _mm256_maskz_rcp_ph(0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_rcp_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let r = _mm512_rcp_ph(a);
+ let e = _mm512_set1_ph(0.5);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmadd_sh(a, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm512_mask_rcp_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_rcp_ph(src, 0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
+ 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_rcp_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let r = _mm512_maskz_rcp_ph(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_rcp_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_rcp_sh(a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmadd_sh(a, 0, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_mask_rcp_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_rcp_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_mask_fnmadd_sh(a, 1, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_mask_rcp_sh(src, 1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmadd_sh(a, b, c, 0);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ unsafe fn test_mm_maskz_rcp_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_maskz_rcp_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmadd_sh(a, b, c, 1);
- let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_maskz_rcp_sh(1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmadd_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmadd_sh(0, a, b, c);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_rsqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_rsqrt_ph(a);
+ let e = _mm_set1_ph(0.5);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmadd_sh(1, a, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_rsqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let src = _mm_set1_ph(1.0);
+ let r = _mm_mask_rsqrt_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_rsqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_maskz_rsqrt_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
assert_eq_m128h(r, e);
}
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_rsqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_rsqrt_ph(a);
+ let e = _mm256_set1_ph(0.5);
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_rsqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let src = _mm256_set1_ph(1.0);
+ let r = _mm256_mask_rsqrt_ph(src, 0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_rsqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_maskz_rsqrt_ph(0b0101010101010101, a);
+ let e = _mm256_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ );
+ assert_eq_m256h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_rsqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_rsqrt_ph(a);
+ let e = _mm512_set1_ph(0.5);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_rsqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_rsqrt_ph(src, 0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
+ 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ );
+ assert_eq_m512h(r, e);
+ }
+
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 0, b, c,
- );
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 1, b, c,
+ unsafe fn test_mm512_maskz_rsqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_maskz_rsqrt_ph(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
+ 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
+ assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 0,
- );
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 1,
- );
- let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
+ unsafe fn test_mm_rsqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_rsqrt_sh(a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmadd_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0, a, b, c,
- );
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_mask_rsqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_rsqrt_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 1, a, b, c,
- );
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_mask_rsqrt_sh(src, 1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_fnmsub_ph(a, b, c);
- let e = _mm_set1_ph(-5.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_rsqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_maskz_rsqrt_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c);
- let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0);
+ let r = _mm_maskz_rsqrt_sh(1, a, b);
+ let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101);
- let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0);
+ unsafe fn test_mm_sqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_sqrt_ph(a);
+ let e = _mm_set1_ph(2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fnmsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c);
- let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0);
+ unsafe fn test_mm_mask_sqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let src = _mm_set1_ph(1.0);
+ let r = _mm_mask_sqrt_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_fnmsub_ph(a, b, c);
- let e = _mm256_set1_ph(-5.0);
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_maskz_sqrt_ph() {
+ let a = _mm_set1_ph(4.0);
+ let r = _mm_maskz_sqrt_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c);
- let e = _mm256_set_ph(
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- );
+ unsafe fn test_mm256_sqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_sqrt_ph(a);
+ let e = _mm256_set1_ph(2.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101);
+ unsafe fn test_mm256_mask_sqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let src = _mm256_set1_ph(1.0);
+ let r = _mm256_mask_sqrt_ph(src, 0b0101010101010101, a);
let e = _mm256_set_ph(
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fnmsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c);
+ unsafe fn test_mm256_maskz_sqrt_ph() {
+ let a = _mm256_set1_ph(4.0);
+ let r = _mm256_maskz_sqrt_ph(0b0101010101010101, a);
let e = _mm256_set_ph(
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_fnmsub_ph(a, b, c);
- let e = _mm512_set1_ph(-5.0);
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c);
- let e = _mm512_set_ph(
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- );
+ unsafe fn test_mm512_sqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_sqrt_ph(a);
+ let e = _mm512_set1_ph(2.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101);
+ unsafe fn test_mm512_mask_sqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_sqrt_ph(src, 0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c);
+ unsafe fn test_mm512_maskz_sqrt_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_maskz_sqrt_ph(0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r =
- _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set1_ph(-5.0);
+ unsafe fn test_mm512_sqrt_round_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm512_set1_ph(2.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a,
+ unsafe fn test_mm512_mask_sqrt_round_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let src = _mm512_set1_ph(1.0);
+ let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
0b01010101010101010101010101010101,
- b,
- c,
- );
- let e = _mm512_set_ph(
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a,
- b,
- c,
- 0b01010101010101010101010101010101,
);
let e = _mm512_set_ph(
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
- 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fnmsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_sqrt_round_ph() {
+ let a = _mm512_set1_ph(4.0);
+ let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
- b,
- c,
);
let e = _mm512_set_ph(
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
- 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmsub_sh(a, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmsub_sh(a, 0, b, c);
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fnmsub_sh(a, 1, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_sqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_sqrt_sh(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmsub_sh(a, b, c, 0);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ unsafe fn test_mm_mask_sqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_sqrt_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmsub_sh(a, b, c, 1);
- let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_sqrt_sh(src, 1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmsub_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmsub_sh(0, a, b, c);
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmsub_sh(1, a, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_maskz_sqrt_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_maskz_sqrt_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ let r = _mm_maskz_sqrt_sh(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 0, b, c,
- );
- let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, 1, b, c,
- );
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_sqrt_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask3_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 0,
+ unsafe fn test_mm_mask_sqrt_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
);
- let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a, b, c, 1,
+ let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
);
- let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_fnmsub_round_sh() {
- let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
- let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
- let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
- let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0, a, b, c,
- );
- let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 1, a, b, c,
- );
- let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
+ unsafe fn test_mm_maskz_sqrt_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
+ let r =
+ _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_fmaddsub_ph(a, b, c);
- let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0);
+ let r =
+ _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c);
- let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0);
+ unsafe fn test_mm_max_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_max_ph(a, b);
+ let e = _mm_set1_ph(2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011);
- let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0);
+ unsafe fn test_mm_mask_max_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let src = _mm_set1_ph(3.0);
+ let r = _mm_mask_max_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmaddsub_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c);
- let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0);
+ unsafe fn test_mm_maskz_max_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_maskz_max_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmaddsub_ph(a, b, c);
- let e = _mm256_set_ph(
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- );
- assert_eq_m256h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c);
- let e = _mm256_set_ph(
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- );
+ unsafe fn test_mm256_max_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_max_ph(a, b);
+ let e = _mm256_set1_ph(2.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011);
+ unsafe fn test_mm256_mask_max_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let src = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_max_ph(src, 0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmaddsub_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c);
+ unsafe fn test_mm256_maskz_max_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_maskz_max_ph(0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmaddsub_ph(a, b, c);
- let e = _mm512_set_ph(
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c);
- let e = _mm512_set_ph(
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- );
- assert_eq_m512h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011);
- let e = _mm512_set_ph(
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
- );
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_max_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_max_ph(a, b);
+ let e = _mm512_set1_ph(2.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmaddsub_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c);
+ unsafe fn test_mm512_mask_max_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_max_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
+ 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r =
- _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ unsafe fn test_mm512_maskz_max_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_max_ph(0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
- 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- a,
- 0b00110011001100110011001100110011,
- b,
- c,
- );
- let e = _mm512_set_ph(
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
- );
+ unsafe fn test_mm512_max_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(2.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_max_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
a,
b,
- c,
- 0b00110011001100110011001100110011,
);
let e = _mm512_set_ph(
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
- 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
+ 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
+ 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmaddsub_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b00110011001100110011001100110011,
+ unsafe fn test_mm512_maskz_max_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
- c,
);
let e = _mm512_set_ph(
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
- 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
+ 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
+ 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
);
assert_eq_m512h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_fmsubadd_ph(a, b, c);
- let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_max_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_max_sh(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c);
- let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_max_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_max_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_max_sh(src, 1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
- #[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask3_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011);
- let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0);
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_max_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_maskz_max_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_max_sh(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_max_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_max_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
+ );
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
+ );
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_max_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r =
+ _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_fmsubadd_ph() {
- let a = _mm_set1_ph(1.0);
- let b = _mm_set1_ph(2.0);
- let c = _mm_set1_ph(3.0);
- let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c);
- let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0);
+ unsafe fn test_mm_min_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_min_ph(a, b);
+ let e = _mm_set1_ph(1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_fmsubadd_ph(a, b, c);
- let e = _mm256_set_ph(
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- );
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_mask_min_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let src = _mm_set1_ph(3.0);
+ let r = _mm_mask_min_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c);
- let e = _mm256_set_ph(
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
- );
- assert_eq_m256h(r, e);
+ unsafe fn test_mm_maskz_min_ph() {
+ let a = _mm_set1_ph(2.0);
+ let b = _mm_set1_ph(1.0);
+ let r = _mm_maskz_min_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask3_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011);
- let e = _mm256_set_ph(
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
- );
+ unsafe fn test_mm256_min_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_min_ph(a, b);
+ let e = _mm256_set1_ph(1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_fmsubadd_ph() {
- let a = _mm256_set1_ph(1.0);
- let b = _mm256_set1_ph(2.0);
- let c = _mm256_set1_ph(3.0);
- let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c);
+ unsafe fn test_mm256_mask_min_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let src = _mm256_set1_ph(3.0);
+ let r = _mm256_mask_min_ph(src, 0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m256h(r, e);
}
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_fmsubadd_ph(a, b, c);
- let e = _mm512_set_ph(
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_min_ph() {
+ let a = _mm256_set1_ph(2.0);
+ let b = _mm256_set1_ph(1.0);
+ let r = _mm256_maskz_min_ph(0b0101010101010101, a, b);
+ let e = _mm256_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
- assert_eq_m512h(r, e);
+ assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c);
- let e = _mm512_set_ph(
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
- );
+ unsafe fn test_mm512_min_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_min_ph(a, b);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011);
+ unsafe fn test_mm512_mask_min_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_min_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsubadd_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c);
+ unsafe fn test_mm512_maskz_min_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_min_ph(0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r =
- _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
- let e = _mm512_set_ph(
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
- );
+ unsafe fn test_mm512_min_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_min_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let src = _mm512_set1_ph(3.0);
+ let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
a,
- 0b00110011001100110011001100110011,
b,
- c,
);
let e = _mm512_set_ph(
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
- 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
+ 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
+ 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask3_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_min_round_ph() {
+ let a = _mm512_set1_ph(2.0);
+ let b = _mm512_set1_ph(1.0);
+ let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
a,
b,
- c,
- 0b00110011001100110011001100110011,
);
let e = _mm512_set_ph(
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
- 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_fmsubadd_round_ph() {
- let a = _mm512_set1_ph(1.0);
- let b = _mm512_set1_ph(2.0);
- let c = _mm512_set1_ph(3.0);
- let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- 0b00110011001100110011001100110011,
- a,
- b,
- c,
+ unsafe fn test_mm_min_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_min_sh(a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_min_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_min_sh(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_min_sh(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_min_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_maskz_min_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_min_sh(1, a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_min_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_min_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
+ let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0, a, b,
);
- let e = _mm512_set_ph(
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
- 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
+ let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 1, a, b,
);
- assert_eq_m512h(r, e);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_min_round_sh() {
+ let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ let r =
+ _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
+ let r =
+ _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_rcp_ph() {
- let a = _mm_set1_ph(2.0);
- let r = _mm_rcp_ph(a);
- let e = _mm_set1_ph(0.5);
+ unsafe fn test_mm_getexp_ph() {
+ let a = _mm_set1_ph(3.0);
+ let r = _mm_getexp_ph(a);
+ let e = _mm_set1_ph(1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_rcp_ph() {
- let a = _mm_set1_ph(2.0);
- let src = _mm_set1_ph(1.0);
- let r = _mm_mask_rcp_ph(src, 0b01010101, a);
- let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
+ unsafe fn test_mm_mask_getexp_ph() {
+ let a = _mm_set1_ph(3.0);
+ let src = _mm_set1_ph(4.0);
+ let r = _mm_mask_getexp_ph(src, 0b01010101, a);
+ let e = _mm_set_ph(4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_rcp_ph() {
- let a = _mm_set1_ph(2.0);
- let r = _mm_maskz_rcp_ph(0b01010101, a);
- let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ unsafe fn test_mm_maskz_getexp_ph() {
+ let a = _mm_set1_ph(3.0);
+ let r = _mm_maskz_getexp_ph(0b01010101, a);
+ let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_rcp_ph() {
- let a = _mm256_set1_ph(2.0);
- let r = _mm256_rcp_ph(a);
- let e = _mm256_set1_ph(0.5);
+ unsafe fn test_mm256_getexp_ph() {
+ let a = _mm256_set1_ph(3.0);
+ let r = _mm256_getexp_ph(a);
+ let e = _mm256_set1_ph(1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_rcp_ph() {
- let a = _mm256_set1_ph(2.0);
- let src = _mm256_set1_ph(1.0);
- let r = _mm256_mask_rcp_ph(src, 0b0101010101010101, a);
+ unsafe fn test_mm256_mask_getexp_ph() {
+ let a = _mm256_set1_ph(3.0);
+ let src = _mm256_set1_ph(4.0);
+ let r = _mm256_mask_getexp_ph(src, 0b0101010101010101, a);
let e = _mm256_set_ph(
- 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_rcp_ph() {
- let a = _mm256_set1_ph(2.0);
- let r = _mm256_maskz_rcp_ph(0b0101010101010101, a);
+ unsafe fn test_mm256_maskz_getexp_ph() {
+ let a = _mm256_set1_ph(3.0);
+ let r = _mm256_maskz_getexp_ph(0b0101010101010101, a);
let e = _mm256_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_rcp_ph() {
- let a = _mm512_set1_ph(2.0);
- let r = _mm512_rcp_ph(a);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_getexp_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let r = _mm512_getexp_ph(a);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_rcp_ph() {
- let a = _mm512_set1_ph(2.0);
- let src = _mm512_set1_ph(1.0);
- let r = _mm512_mask_rcp_ph(src, 0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_mask_getexp_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let src = _mm512_set1_ph(4.0);
+ let r = _mm512_mask_getexp_ph(src, 0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
- 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
+ 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_getexp_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_getexp_ph(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_getexp_round_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let r = _mm512_getexp_round_ph::<_MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set1_ph(1.0);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_getexp_round_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let src = _mm512_set1_ph(4.0);
+ let r = _mm512_mask_getexp_round_ph::<_MM_FROUND_NO_EXC>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ );
+ let e = _mm512_set_ph(
+ 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
+ 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_getexp_round_ph() {
+ let a = _mm512_set1_ph(3.0);
+ let r = _mm512_maskz_getexp_round_ph::<_MM_FROUND_NO_EXC>(
+ 0b01010101010101010101010101010101,
+ a,
+ );
+ let e = _mm512_set_ph(
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_rcp_ph() {
- let a = _mm512_set1_ph(2.0);
- let r = _mm512_maskz_rcp_ph(0b01010101010101010101010101010101, a);
- let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
- );
- assert_eq_m512h(r, e);
+ unsafe fn test_mm_getexp_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_getexp_sh(a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_getexp_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_getexp_sh(src, 0, a, b);
+ let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_getexp_sh(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_getexp_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_getexp_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_getexp_sh(1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_rcp_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_rcp_sh(a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_getexp_round_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_getexp_round_sh::<_MM_FROUND_NO_EXC>(a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_rcp_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_rcp_sh(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_getexp_round_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 0, a, b);
+ let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_rcp_sh(src, 1, a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_rcp_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_maskz_rcp_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_getexp_round_sh() {
+ let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_rcp_sh(1, a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_rsqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let r = _mm_rsqrt_ph(a);
- let e = _mm_set1_ph(0.5);
+ unsafe fn test_mm_getmant_ph() {
+ let a = _mm_set1_ph(10.0);
+ let r = _mm_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
+ let e = _mm_set1_ph(1.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_rsqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let src = _mm_set1_ph(1.0);
- let r = _mm_mask_rsqrt_ph(src, 0b01010101, a);
- let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
+ unsafe fn test_mm_mask_getmant_ph() {
+ let a = _mm_set1_ph(10.0);
+ let src = _mm_set1_ph(20.0);
+ let r = _mm_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0b01010101, a);
+ let e = _mm_set_ph(20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_rsqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let r = _mm_maskz_rsqrt_ph(0b01010101, a);
- let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
+ unsafe fn test_mm_maskz_getmant_ph() {
+ let a = _mm_set1_ph(10.0);
+ let r = _mm_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0b01010101, a);
+ let e = _mm_set_ph(0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_rsqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let r = _mm256_rsqrt_ph(a);
- let e = _mm256_set1_ph(0.5);
+ unsafe fn test_mm256_getmant_ph() {
+ let a = _mm256_set1_ph(10.0);
+ let r = _mm256_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
+ let e = _mm256_set1_ph(1.25);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_rsqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let src = _mm256_set1_ph(1.0);
- let r = _mm256_mask_rsqrt_ph(src, 0b0101010101010101, a);
+ unsafe fn test_mm256_mask_getmant_ph() {
+ let a = _mm256_set1_ph(10.0);
+ let src = _mm256_set1_ph(20.0);
+ let r = _mm256_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
+ src,
+ 0b0101010101010101,
+ a,
+ );
let e = _mm256_set_ph(
- 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
+ 20.0, 1.25,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_rsqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let r = _mm256_maskz_rsqrt_ph(0b0101010101010101, a);
+ unsafe fn test_mm256_maskz_getmant_ph() {
+ let a = _mm256_set1_ph(10.0);
+ let r = _mm256_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
+ 0b0101010101010101,
+ a,
+ );
let e = _mm256_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_rsqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_rsqrt_ph(a);
- let e = _mm512_set1_ph(0.5);
+ unsafe fn test_mm512_getmant_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let r = _mm512_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
+ let e = _mm512_set1_ph(1.25);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_rsqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let src = _mm512_set1_ph(1.0);
- let r = _mm512_mask_rsqrt_ph(src, 0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_mask_getmant_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let src = _mm512_set1_ph(20.0);
+ let r = _mm512_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ );
let e = _mm512_set_ph(
- 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
- 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
+ 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
+ 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
+ 20.0, 1.25, 20.0, 1.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_rsqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_maskz_rsqrt_ph(0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_maskz_getmant_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let r = _mm512_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
+ 0b01010101010101010101010101010101,
+ a,
+ );
let e = _mm512_set_ph(
- 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
- 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
+ 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
+ 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_rsqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r = _mm_rsqrt_sh(a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm512_getmant_round_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let r =
+ _mm512_getmant_round_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
+ a,
+ );
+ let e = _mm512_set1_ph(1.25);
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_getmant_round_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let src = _mm512_set1_ph(20.0);
+ let r = _mm512_mask_getmant_round_ph::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(src, 0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
+ 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
+ 20.0, 1.25, 20.0, 1.25,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_getmant_round_ph() {
+ let a = _mm512_set1_ph(10.0);
+ let r = _mm512_maskz_getmant_round_ph::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_ph(
+ 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
+ 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
+ );
+ assert_eq_m512h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_getmant_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a, b);
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_rsqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_rsqrt_sh(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_getmant_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0, a, b);
+ let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_rsqrt_sh(src, 1, a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 1, a, b);
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_rsqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r = _mm_maskz_rsqrt_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_getmant_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_rsqrt_sh(1, a, b);
- let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(1, a, b);
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_getmant_round_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_getmant_round_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
+ a, b,
+ );
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_getmant_round_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_getmant_round_sh::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(src, 0, a, b);
+ let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_getmant_round_sh::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(src, 1, a, b);
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_getmant_round_sh() {
+ let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_getmant_round_sh::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_maskz_getmant_round_sh::<
+ _MM_MANT_NORM_P75_1P5,
+ _MM_MANT_SIGN_NAN,
+ _MM_FROUND_NO_EXC,
+ >(1, a, b);
+ let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_sqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let r = _mm_sqrt_ph(a);
- let e = _mm_set1_ph(2.0);
+ unsafe fn test_mm_roundscale_ph() {
+ let a = _mm_set1_ph(1.1);
+ let r = _mm_roundscale_ph::<0>(a);
+ let e = _mm_set1_ph(1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_sqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let src = _mm_set1_ph(1.0);
- let r = _mm_mask_sqrt_ph(src, 0b01010101, a);
- let e = _mm_set_ph(1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0);
+ unsafe fn test_mm_mask_roundscale_ph() {
+ let a = _mm_set1_ph(1.1);
+ let src = _mm_set1_ph(2.0);
+ let r = _mm_mask_roundscale_ph::<0>(src, 0b01010101, a);
+ let e = _mm_set_ph(2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_sqrt_ph() {
- let a = _mm_set1_ph(4.0);
- let r = _mm_maskz_sqrt_ph(0b01010101, a);
- let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
+ unsafe fn test_mm_maskz_roundscale_ph() {
+ let a = _mm_set1_ph(1.1);
+ let r = _mm_maskz_roundscale_ph::<0>(0b01010101, a);
+ let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_sqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let r = _mm256_sqrt_ph(a);
- let e = _mm256_set1_ph(2.0);
+ unsafe fn test_mm256_roundscale_ph() {
+ let a = _mm256_set1_ph(1.1);
+ let r = _mm256_roundscale_ph::<0>(a);
+ let e = _mm256_set1_ph(1.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_sqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let src = _mm256_set1_ph(1.0);
- let r = _mm256_mask_sqrt_ph(src, 0b0101010101010101, a);
+ unsafe fn test_mm256_mask_roundscale_ph() {
+ let a = _mm256_set1_ph(1.1);
+ let src = _mm256_set1_ph(2.0);
+ let r = _mm256_mask_roundscale_ph::<0>(src, 0b0101010101010101, a);
let e = _mm256_set_ph(
- 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_sqrt_ph() {
- let a = _mm256_set1_ph(4.0);
- let r = _mm256_maskz_sqrt_ph(0b0101010101010101, a);
+ unsafe fn test_mm256_maskz_roundscale_ph() {
+ let a = _mm256_set1_ph(1.1);
+ let r = _mm256_maskz_roundscale_ph::<0>(0b0101010101010101, a);
let e = _mm256_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_sqrt_ph(a);
- let e = _mm512_set1_ph(2.0);
+ unsafe fn test_mm512_roundscale_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let r = _mm512_roundscale_ph::<0>(a);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let src = _mm512_set1_ph(1.0);
- let r = _mm512_mask_sqrt_ph(src, 0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_mask_roundscale_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let src = _mm512_set1_ph(2.0);
+ let r = _mm512_mask_roundscale_ph::<0>(src, 0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
- 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sqrt_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_maskz_sqrt_ph(0b01010101010101010101010101010101, a);
+ unsafe fn test_mm512_maskz_roundscale_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let r = _mm512_maskz_roundscale_ph::<0>(0b01010101010101010101010101010101, a);
let e = _mm512_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
- 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_sqrt_round_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
- let e = _mm512_set1_ph(2.0);
+ unsafe fn test_mm512_roundscale_round_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let r = _mm512_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set1_ph(1.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_sqrt_round_ph() {
- let a = _mm512_set1_ph(4.0);
- let src = _mm512_set1_ph(1.0);
- let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_roundscale_round_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let src = _mm512_set1_ph(2.0);
+ let r = _mm512_mask_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
src,
0b01010101010101010101010101010101,
a,
);
let e = _mm512_set_ph(
- 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
- 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
+ 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_sqrt_round_ph() {
- let a = _mm512_set1_ph(4.0);
- let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_roundscale_round_ph() {
+ let a = _mm512_set1_ph(1.1);
+ let r = _mm512_maskz_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
0b01010101010101010101010101010101,
a,
);
let e = _mm512_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
- 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
+ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r = _mm_sqrt_sh(a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_roundscale_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_roundscale_sh::<0>(a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_sqrt_sh(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_roundscale_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_roundscale_sh::<0>(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_sqrt_sh(src, 1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_roundscale_sh::<0>(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sqrt_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r = _mm_maskz_sqrt_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- assert_eq_m128h(r, e);
- let r = _mm_maskz_sqrt_sh(1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_roundscale_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_roundscale_sh::<0>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- }
-
- #[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_sqrt_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_roundscale_sh::<0>(1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_sqrt_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 0, a, b,
- );
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_roundscale_round_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
- src, 1, a, b,
- );
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_roundscale_round_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 0, a, b);
+ let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m128h(r, e);
+ let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_sqrt_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
- let r =
- _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_roundscale_round_sh() {
+ let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r =
- _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(1, a, b);
+ let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_max_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let r = _mm_max_ph(a, b);
- let e = _mm_set1_ph(2.0);
+ unsafe fn test_mm_scalef_ph() {
+ let a = _mm_set1_ph(1.);
+ let b = _mm_set1_ph(3.);
+ let r = _mm_scalef_ph(a, b);
+ let e = _mm_set1_ph(8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_max_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let src = _mm_set1_ph(3.0);
- let r = _mm_mask_max_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0);
+ unsafe fn test_mm_mask_scalef_ph() {
+ let a = _mm_set1_ph(1.);
+ let b = _mm_set1_ph(3.);
+ let src = _mm_set1_ph(2.);
+ let r = _mm_mask_scalef_ph(src, 0b01010101, a, b);
+ let e = _mm_set_ph(2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_max_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let r = _mm_maskz_max_ph(0b01010101, a, b);
- let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
+ unsafe fn test_mm_maskz_scalef_ph() {
+ let a = _mm_set1_ph(1.);
+ let b = _mm_set1_ph(3.);
+ let r = _mm_maskz_scalef_ph(0b01010101, a, b);
+ let e = _mm_set_ph(0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_max_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let r = _mm256_max_ph(a, b);
- let e = _mm256_set1_ph(2.0);
+ unsafe fn test_mm256_scalef_ph() {
+ let a = _mm256_set1_ph(1.);
+ let b = _mm256_set1_ph(3.);
+ let r = _mm256_scalef_ph(a, b);
+ let e = _mm256_set1_ph(8.0);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_max_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let src = _mm256_set1_ph(3.0);
- let r = _mm256_mask_max_ph(src, 0b0101010101010101, a, b);
+ unsafe fn test_mm256_mask_scalef_ph() {
+ let a = _mm256_set1_ph(1.);
+ let b = _mm256_set1_ph(3.);
+ let src = _mm256_set1_ph(2.);
+ let r = _mm256_mask_scalef_ph(src, 0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
+ 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_max_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let r = _mm256_maskz_max_ph(0b0101010101010101, a, b);
+ unsafe fn test_mm256_maskz_scalef_ph() {
+ let a = _mm256_set1_ph(1.);
+ let b = _mm256_set1_ph(3.);
+ let r = _mm256_maskz_scalef_ph(0b0101010101010101, a, b);
let e = _mm256_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_max_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_max_ph(a, b);
- let e = _mm512_set1_ph(2.0);
+ unsafe fn test_mm512_scalef_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let r = _mm512_scalef_ph(a, b);
+ let e = _mm512_set1_ph(8.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_max_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let src = _mm512_set1_ph(3.0);
- let r = _mm512_mask_max_ph(src, 0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_mask_scalef_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let src = _mm512_set1_ph(2.);
+ let r = _mm512_mask_scalef_ph(src, 0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
- 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
+ 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
+ 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_max_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_maskz_max_ph(0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_maskz_scalef_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let r = _mm512_maskz_scalef_ph(0b01010101010101010101010101010101, a, b);
let e = _mm512_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
- 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
+ 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_max_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(2.0);
+ unsafe fn test_mm512_scalef_round_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let r = _mm512_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_ph(8.0);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_max_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let src = _mm512_set1_ph(3.0);
- let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_scalef_round_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let src = _mm512_set1_ph(2.);
+ let r = _mm512_mask_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src,
0b01010101010101010101010101010101,
a,
b,
);
let e = _mm512_set_ph(
- 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
- 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
+ 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
+ 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_max_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_scalef_round_ph() {
+ let a = _mm512_set1_ph(1.);
+ let b = _mm512_set1_ph(3.);
+ let r = _mm512_maskz_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b01010101010101010101010101010101,
a,
b,
);
let e = _mm512_set_ph(
- 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
- 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
+ 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
+ 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_max_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_max_sh(a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_scalef_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_scalef_sh(a, b);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_max_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_max_sh(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_scalef_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_scalef_sh(src, 0, a, b);
+ let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_max_sh(src, 1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_scalef_sh(src, 1, a, b);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_max_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_maskz_max_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_scalef_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_scalef_sh(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_max_sh(1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_scalef_sh(1, a, b);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_max_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_scalef_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_max_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_scalef_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 0, a, b,
);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
src, 1, a, b,
);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_max_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ unsafe fn test_mm_maskz_scalef_round_sh() {
+ let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
let r =
- _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
let r =
- _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
+ let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_min_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let r = _mm_min_ph(a, b);
- let e = _mm_set1_ph(1.0);
+ unsafe fn test_mm_reduce_ph() {
+ let a = _mm_set1_ph(1.25);
+ let r = _mm_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
+ let e = _mm_set1_ph(0.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_mask_min_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let src = _mm_set1_ph(3.0);
- let r = _mm_mask_min_ph(src, 0b01010101, a, b);
- let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
+ unsafe fn test_mm_mask_reduce_ph() {
+ let a = _mm_set1_ph(1.25);
+ let src = _mm_set1_ph(2.0);
+ let r = _mm_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01010101, a);
+ let e = _mm_set_ph(2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm_maskz_min_ph() {
- let a = _mm_set1_ph(2.0);
- let b = _mm_set1_ph(1.0);
- let r = _mm_maskz_min_ph(0b01010101, a, b);
- let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
+ unsafe fn test_mm_maskz_reduce_ph() {
+ let a = _mm_set1_ph(1.25);
+ let r = _mm_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01010101, a);
+ let e = _mm_set_ph(0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_min_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let r = _mm256_min_ph(a, b);
- let e = _mm256_set1_ph(1.0);
+ unsafe fn test_mm256_reduce_ph() {
+ let a = _mm256_set1_ph(1.25);
+ let r = _mm256_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
+ let e = _mm256_set1_ph(0.25);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_mask_min_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let src = _mm256_set1_ph(3.0);
- let r = _mm256_mask_min_ph(src, 0b0101010101010101, a, b);
+ unsafe fn test_mm256_mask_reduce_ph() {
+ let a = _mm256_set1_ph(1.25);
+ let src = _mm256_set1_ph(2.0);
+ let r = _mm256_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0101010101010101, a);
let e = _mm256_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16,avx512vl")]
- unsafe fn test_mm256_maskz_min_ph() {
- let a = _mm256_set1_ph(2.0);
- let b = _mm256_set1_ph(1.0);
- let r = _mm256_maskz_min_ph(0b0101010101010101, a, b);
+ unsafe fn test_mm256_maskz_reduce_ph() {
+ let a = _mm256_set1_ph(1.25);
+ let r = _mm256_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0101010101010101, a);
let e = _mm256_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
);
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_min_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_min_ph(a, b);
- let e = _mm512_set1_ph(1.0);
+ unsafe fn test_mm512_reduce_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let r = _mm512_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
+ let e = _mm512_set1_ph(0.25);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_min_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let src = _mm512_set1_ph(3.0);
- let r = _mm512_mask_min_ph(src, 0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_mask_reduce_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let src = _mm512_set1_ph(2.0);
+ let r = _mm512_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ );
let e = _mm512_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
- 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
+ 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_min_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_maskz_min_ph(0b01010101010101010101010101010101, a, b);
+ unsafe fn test_mm512_maskz_reduce_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let r = _mm512_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
+ 0b01010101010101010101010101010101,
+ a,
+ );
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
+ 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_min_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm512_set1_ph(1.0);
+ unsafe fn test_mm512_reduce_round_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let r = _mm512_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set1_ph(0.25);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_mask_min_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let src = _mm512_set1_ph(3.0);
- let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_mask_reduce_round_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let src = _mm512_set1_ph(2.0);
+ let r = _mm512_mask_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
src,
0b01010101010101010101010101010101,
a,
- b,
);
let e = _mm512_set_ph(
- 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
- 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
+ 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
+ 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm512_maskz_min_round_ph() {
- let a = _mm512_set1_ph(2.0);
- let b = _mm512_set1_ph(1.0);
- let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm512_maskz_reduce_round_ph() {
+ let a = _mm512_set1_ph(1.25);
+ let r = _mm512_maskz_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
0b01010101010101010101010101010101,
a,
- b,
);
let e = _mm512_set_ph(
- 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
- 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
+ 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
+ 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
);
assert_eq_m512h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_min_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_min_sh(a, b);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_reduce_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_min_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_min_sh(src, 0, a, b);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_mask_reduce_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0, a, b);
+ let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_min_sh(src, 1, a, b);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 1, a, b);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_min_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_maskz_min_sh(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_maskz_reduce_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_maskz_min_sh(1, a, b);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(1, a, b);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_min_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ unsafe fn test_mm_reduce_round_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
+ let r = _mm_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_mask_min_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
- let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
- let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ unsafe fn test_mm_mask_reduce_round_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
+ let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
+ let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
src, 0, a, b,
);
- let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
- let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
src, 1, a, b,
);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
- unsafe fn test_mm_maskz_min_round_sh() {
- let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
- let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
+ unsafe fn test_mm_maskz_reduce_round_sh() {
+ let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
+ let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
let r =
- _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
- let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0, a, b);
+ let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
let r =
- _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
- let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(1, a, b);
+ let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
assert_eq_m128h(r, e);
}
}