From 066eba0c64393c1f659187b57d66dee00678584b Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Mon, 3 Sep 2018 16:34:22 +0200 Subject: [PATCH 01/31] remove cmsis module; add acle module ACLE (ARM C Language Extensions) is more general (supports ARMv4 to ARMv8) than CMSIS (ARMv7-M and ARMv7-R) --- crates/core_arch/src/aarch64/mod.rs | 2 + crates/core_arch/src/acle/barrier/common.rs | 14 + crates/core_arch/src/acle/barrier/mod.rs | 89 +++++ .../core_arch/src/acle/barrier/not_mclass.rs | 43 +++ crates/core_arch/src/acle/barrier/v8.rs | 23 ++ crates/core_arch/src/acle/dsp.rs | 24 ++ crates/core_arch/src/acle/hints.rs | 115 ++++++ crates/core_arch/src/acle/mod.rs | 134 +++++++ .../core_arch/src/acle/registers/aarch32.rs | 4 + crates/core_arch/src/acle/registers/mod.rs | 121 +++++++ crates/core_arch/src/acle/registers/v6m.rs | 39 +++ crates/core_arch/src/acle/registers/v7m.rs | 17 + crates/core_arch/src/acle/simd32.rs | 60 ++++ crates/core_arch/src/arm/cmsis.rs | 330 ------------------ crates/core_arch/src/arm/mod.rs | 8 +- crates/core_arch/src/mod.rs | 3 + 16 files changed, 691 insertions(+), 335 deletions(-) create mode 100644 crates/core_arch/src/acle/barrier/common.rs create mode 100644 crates/core_arch/src/acle/barrier/mod.rs create mode 100644 crates/core_arch/src/acle/barrier/not_mclass.rs create mode 100644 crates/core_arch/src/acle/barrier/v8.rs create mode 100644 crates/core_arch/src/acle/dsp.rs create mode 100644 crates/core_arch/src/acle/hints.rs create mode 100644 crates/core_arch/src/acle/mod.rs create mode 100644 crates/core_arch/src/acle/registers/aarch32.rs create mode 100644 crates/core_arch/src/acle/registers/mod.rs create mode 100644 crates/core_arch/src/acle/registers/v6m.rs create mode 100644 crates/core_arch/src/acle/registers/v7m.rs create mode 100644 crates/core_arch/src/acle/simd32.rs delete mode 100644 crates/core_arch/src/arm/cmsis.rs diff --git a/crates/core_arch/src/aarch64/mod.rs b/crates/core_arch/src/aarch64/mod.rs index d573e2c0b8..4821438e9f 100644 --- a/crates/core_arch/src/aarch64/mod.rs +++ b/crates/core_arch/src/aarch64/mod.rs @@ -18,6 +18,8 @@ pub use self::crypto::*; mod crc; pub use self::crc::*; +pub use super::acle::*; + #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/crates/core_arch/src/acle/barrier/common.rs b/crates/core_arch/src/acle/barrier/common.rs new file mode 100644 index 0000000000..a1d8c93e8c --- /dev/null +++ b/crates/core_arch/src/acle/barrier/common.rs @@ -0,0 +1,14 @@ +//! Access types available on all architectures + +/// Full system is the required shareability domain, reads and writes are the +/// required access types +pub struct SY; + +dmb_dsb!(SY); + +impl super::super::sealed::Isb for SY { + #[inline(always)] + unsafe fn __isb(&self) { + asm!("ISB SY" : : : "memory" : "volatile") + } +} diff --git a/crates/core_arch/src/acle/barrier/mod.rs b/crates/core_arch/src/acle/barrier/mod.rs new file mode 100644 index 0000000000..3fbf6899ff --- /dev/null +++ b/crates/core_arch/src/acle/barrier/mod.rs @@ -0,0 +1,89 @@ +// Reference: Section 7.4 "Hints" of ACLE + +macro_rules! dmb_dsb { + ($A:ident) => { + impl super::super::sealed::Dmb for $A { + #[inline(always)] + unsafe fn __dmb(&self) { + asm!(concat!("DMB ", stringify!($A)) : : : "memory" : "volatile") + } + } + + impl super::super::sealed::Dsb for $A { + #[inline(always)] + unsafe fn __dsb(&self) { + asm!(concat!("DSB ", stringify!($A)) : : : "memory" : "volatile") + } + } + }; +} + +mod common; + +pub use self::common::*; + +#[cfg(not(target_feature = "mclass"))] +mod not_mclass; + +#[cfg(not(target_feature = "mclass"))] +pub use self::not_mclass::*; + +#[cfg(target_arch = "aarch64")] +mod v8; + +#[cfg(target_arch = "aarch64")] +pub use self::v8::*; + +/// Generates a DMB (data memory barrier) instruction or equivalent CP15 instruction. +/// +/// DMB ensures the observed ordering of memory accesses. Memory accesses of the specified type +/// issued before the DMB are guaranteed to be observed (in the specified scope) before memory +/// accesses issued after the DMB. +/// +/// For example, DMB should be used between storing data, and updating a flag variable that makes +/// that data available to another core. +/// +/// The __dmb() intrinsic also acts as a compiler memory barrier of the appropriate type. +#[inline(always)] +pub unsafe fn __dmb(arg: A) +where + A: super::sealed::Dmb, +{ + arg.__dmb() +} + +/// Generates a DSB (data synchronization barrier) instruction or equivalent CP15 instruction. +/// +/// DSB ensures the completion of memory accesses. A DSB behaves as the equivalent DMB and has +/// additional properties. After a DSB instruction completes, all memory accesses of the specified +/// type issued before the DSB are guaranteed to have completed. +/// +/// The __dsb() intrinsic also acts as a compiler memory barrier of the appropriate type. +#[inline(always)] +pub unsafe fn __dsb(arg: A) +where + A: super::sealed::Dsb, +{ + arg.__dsb() +} + +/// Generates an ISB (instruction synchronization barrier) instruction or equivalent CP15 +/// instruction. +/// +/// This instruction flushes the processor pipeline fetch buffers, so that following instructions +/// are fetched from cache or memory. +/// +/// An ISB is needed after some system maintenance operations. An ISB is also needed before +/// transferring control to code that has been loaded or modified in memory, for example by an +/// overlay mechanism or just-in-time code generator. (Note that if instruction and data caches are +/// separate, privileged cache maintenance operations would be needed in order to unify the caches.) +/// +/// The only supported argument for the __isb() intrinsic is 15, corresponding to the SY (full +/// system) scope of the ISB instruction. +#[inline(always)] +pub unsafe fn __isb(arg: A) +where + A: super::sealed::Isb, +{ + arg.__isb() +} diff --git a/crates/core_arch/src/acle/barrier/not_mclass.rs b/crates/core_arch/src/acle/barrier/not_mclass.rs new file mode 100644 index 0000000000..385e1d5289 --- /dev/null +++ b/crates/core_arch/src/acle/barrier/not_mclass.rs @@ -0,0 +1,43 @@ +//! Access types available on v7 and v8 but not on v7(E)-M or v8-M + +/// Full system is the required shareability domain, writes are the required +/// access type +pub struct ST; + +dmb_dsb!(ST); + +/// Inner Shareable is the required shareability domain, reads and writes are +/// the required access types +pub struct ISH; + +dmb_dsb!(ISH); + +/// Inner Shareable is the required shareability domain, writes are the required +/// access type +pub struct ISHST; + +dmb_dsb!(ISHST); + +/// Non-shareable is the required shareability domain, reads and writes are the +/// required access types +pub struct NSH; + +dmb_dsb!(NSH); + +/// Non-shareable is the required shareability domain, writes are the required +/// access type +pub struct NSHST; + +dmb_dsb!(NSHST); + +/// Outer Shareable is the required shareability domain, reads and writes are +/// the required access types +pub struct OSH; + +dmb_dsb!(OSH); + +/// Outer Shareable is the required shareability domain, writes are the required +/// access type +pub struct OSHST; + +dmb_dsb!(OSHST); diff --git a/crates/core_arch/src/acle/barrier/v8.rs b/crates/core_arch/src/acle/barrier/v8.rs new file mode 100644 index 0000000000..2951a5a670 --- /dev/null +++ b/crates/core_arch/src/acle/barrier/v8.rs @@ -0,0 +1,23 @@ +/// Full system is the required shareability domain, reads are the required +/// access type +pub struct LD; + +dmb_dsb!(LD); + +/// Inner Shareable is the required shareability domain, reads are the required +/// access type +pub struct ISHLD; + +dmb_dsb!(ISHLD); + +/// Non-shareable is the required shareability domain, reads are the required +/// access type +pub struct NSHLD; + +dmb_dsb!(NSHLD); + +/// Outher Shareable is the required shareability domain, reads are the required +/// access type +pub struct OSHLD; + +dmb_dsb!(OSHLD); diff --git a/crates/core_arch/src/acle/dsp.rs b/crates/core_arch/src/acle/dsp.rs new file mode 100644 index 0000000000..4029e7aaa3 --- /dev/null +++ b/crates/core_arch/src/acle/dsp.rs @@ -0,0 +1,24 @@ +//! # References: +//! +//! - Section 8.3 "16-bit multiplications" +//! - Section 8.4 "Saturating intrinsics" +//! +//! Intrinsics that could live here: +//! +//! - __smulbb +//! - __smulbt +//! - __smultb +//! - __smultt +//! - __smulwb +//! - __smulwt +//! - __ssat +//! - __usat +//! - __qadd +//! - __qsub +//! - __qdbl +//! - __smlabb +//! - __smlabt +//! - __smlatb +//! - __smlatt +//! - __smlawb +//! - __smlawt diff --git a/crates/core_arch/src/acle/hints.rs b/crates/core_arch/src/acle/hints.rs new file mode 100644 index 0000000000..1b77e5e64c --- /dev/null +++ b/crates/core_arch/src/acle/hints.rs @@ -0,0 +1,115 @@ +// # References +// +// - Section 7.4 "Hints" of ACLE +// - Section 7.7 "NOP" of ACLE + +/// Generates a WFI (wait for interrupt) hint instruction, or nothing. +/// +/// The WFI instruction allows (but does not require) the processor to enter a +/// low-power state until one of a number of asynchronous events occurs. +// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M +// LLVM says "instruction requires: armv6k" +#[cfg(any(target_feature = "v6k", target_arch = "aarch64"))] +#[inline(always)] +pub unsafe fn __wfi() { + asm!("WFI" : : : : "volatile") +} + +/// Generates a WFE (wait for event) hint instruction, or nothing. +/// +/// The WFE instruction allows (but does not require) the processor to enter a +/// low-power state until some event occurs such as a SEV being issued by +/// another processor. +// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M +// LLVM says "instruction requires: armv6k" +#[cfg(any(target_feature = "v6k", target_arch = "aarch64"))] +#[inline(always)] +pub unsafe fn __wfe() { + asm!("WFE" : : : : "volatile") +} + +/// Generates a SEV (send a global event) hint instruction. +/// +/// This causes an event to be signaled to all processors in a multiprocessor +/// system. It is a NOP on a uniprocessor system. +// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M, 7-M +// LLVM says "instruction requires: armv6k" +#[cfg(any(target_feature = "v6k", target_arch = "aarch64"))] +#[inline(always)] +pub unsafe fn __sev() { + asm!("SEV" : : : : "volatile") +} + +/// Generates a send a local event hint instruction. +/// +/// This causes an event to be signaled to only the processor executing this +/// instruction. In a multiprocessor system, it is not required to affect the +/// other processors. +// LLVM says "instruction requires: armv8" +#[cfg(target_arch = "aarch64")] +#[inline(always)] +pub unsafe fn __sevl() { + asm!("SEVL" : : : : "volatile") +} + +/// Generates a YIELD hint instruction. +/// +/// This enables multithreading software to indicate to the hardware that it is +/// performing a task, for example a spin-lock, that could be swapped out to +/// improve overall system performance. +// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M +// LLVM says "instruction requires: armv6k" +#[cfg(any(target_feature = "v6k", target_arch = "aarch64"))] +#[inline(always)] +pub unsafe fn __yield() { + asm!("YIELD" : : : : "volatile") +} + +/// Generates a DBG instruction. +/// +/// This provides a hint to debugging and related systems. The argument must be +/// a constant integer from 0 to 15 inclusive. See implementation documentation +/// for the effect (if any) of this instruction and the meaning of the +/// argument. This is available only when compliling for AArch32. +// Section 10.1 of ACLE says that the supported arches are: 7, 7-M +// LLVM says "instruction requires: thumb2" OR "instruction requires: armv7" +#[cfg(target_feature = "v6t2")] +#[inline(always)] +#[rustc_args_required_const(0)] +pub unsafe fn __dbg(imm4: u32) { + macro_rules! call { + ($imm4:expr) => { + asm!(concat!("DBG ", stringify!($imm4)) : : : : "volatile") + } + } + + match imm4 & 0b1111 { + 0 => call!(0), + 1 => call!(1), + 2 => call!(2), + 3 => call!(3), + 4 => call!(4), + 5 => call!(5), + 6 => call!(6), + 7 => call!(7), + 8 => call!(8), + 9 => call!(9), + 10 => call!(10), + 11 => call!(11), + 12 => call!(12), + 13 => call!(13), + 14 => call!(14), + _ => call!(15), + } +} + +/// Generates an unspecified no-op instruction. +/// +/// Note that not all architectures provide a distinguished NOP instruction. On +/// those that do, it is unspecified whether this intrinsic generates it or +/// another instruction. It is not guaranteed that inserting this instruction +/// will increase execution time. +#[inline(always)] +pub unsafe fn __nop() { + asm!("NOP" : : : : "volatile") +} diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs new file mode 100644 index 0000000000..ec7dabd2b3 --- /dev/null +++ b/crates/core_arch/src/acle/mod.rs @@ -0,0 +1,134 @@ +//! ARM C Language Extensions (ACLE) +//! +//! # Developer notes +//! +//! Below is a list of built-in targets that are representative of the different ARM +//! architectures; the list includes the `target_feature`s they possess. +//! +//! - `armv4t-unknown-linux-gnueabi` - **ARMv4** - `+v4t` +//! - `armv5te-unknown-linux-gnueabi` - **ARMv5TE** - `+v4t +v5te` +//! - `arm-unknown-linux-gnueabi` - **ARMv6** - `+v4t +v5te +v6` +//! - `thumbv6m-none-eabi` - **ARMv6-M** - `+v4t +v5te +v6 +thumb-mode +mclass` +//! - `armv7-unknown-linux-gnueabihf` - **ARMv7-A** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +aclass` +//! - `armv7r-none-eabi` - **ARMv7-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +rclass` +//! - `thumbv7m-none-eabi` - **ARMv7-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass` +//! - `thumbv7em-none-eabi` - **ARMv7E-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +thumb-mode +mclass` +//! - `aarch64-unknown-linux-gnu` - **ARMv8-A (AArch64)** - `+fp +neon` +//! +//! Section 10.1 of ACLE says: +//! +//! - "In the sequence of Arm architectures { v5, v5TE, v6, v6T2, v7 } each architecture includes +//! its predecessor instruction set." +//! +//! - "In the sequence of Thumb-only architectures { v6-M, v7-M, v7E-M } each architecture includes +//! its predecessor instruction set." +//! +//! From that info and from looking at how LLVM features work (using custom targets) we can identify +//! features that are subsets of others: +//! +//! Legend: `a < b` reads as "`a` is a subset of `b`"; this means that if `b` is enabled then `a` is +//! enabled as well. +//! +//! - `v4t < v5te < v6 < v6k < v6t2 < v7 < v8` +//! - `v6 < v8m < v6t2` +//! - `v7 < v8m.main` +//! +//! # References +//! +//! - [ACLE Q2 2018](https://developer.arm.com/docs/101028/latest) + +// Supported arches: 8, 7, 6-M. See Section 10.1 of ACLE (e.g. DMB) +// But this is further refined within the module +#[cfg(any( + // v8 + target_arch = "aarch64", + // v7 + target_feature = "v7", + // v6-M + target_feature = "mclass" +))] +mod barrier; + +#[cfg(any( + target_arch = "aarch64", + target_feature = "v7", + target_feature = "mclass" +))] +pub use self::barrier::*; + +mod hints; + +pub use self::hints::*; + +mod registers; + +pub use self::registers::*; + +// Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD) +// But we also exclude the A profile because DSP is deprecated on that profile as of ACLE 2.0 (see +// section 5.4.7) +#[cfg(any( + // >= v5TE but excludes v7-A + all(target_feature = "v5te", not(target_feature = "mclass"), not(target_feature = "aclass")), + // v7E-M + all(target_feature = "mclass", target_feature = "dsp"), +))] +mod dsp; + +#[cfg(any( + all( + target_feature = "v5te", + not(target_feature = "mclass"), + not(target_feature = "aclass") + ), + all(target_feature = "mclass", target_feature = "dsp"), +))] +pub use dsp::*; + +// Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says +// Section 5.4.9 of ACLE. +#[cfg(any( + // v7-R + target_feature = "rclass", + // v7E-M + all(target_feature = "mclass", target_feature = "dsp") +))] +mod simd32; + +mod sealed { + pub trait Dmb { + unsafe fn __dmb(&self); + } + + pub trait Dsb { + unsafe fn __dsb(&self); + } + + pub trait Isb { + unsafe fn __isb(&self); + } + + pub trait Rsr { + unsafe fn __rsr(&self) -> u32; + } + + pub trait Rsr64 { + unsafe fn __rsr64(&self) -> u64; + } + + pub trait Rsrp { + unsafe fn __rsrp(&self) -> *const u8; + } + + pub trait Wsr { + unsafe fn __wsr(&self, value: u32); + } + + pub trait Wsr64 { + unsafe fn __wsr64(&self, value: u64); + } + + pub trait Wsrp { + unsafe fn __wsrp(&self, value: *const u8); + } +} diff --git a/crates/core_arch/src/acle/registers/aarch32.rs b/crates/core_arch/src/acle/registers/aarch32.rs new file mode 100644 index 0000000000..f59af5d3ae --- /dev/null +++ b/crates/core_arch/src/acle/registers/aarch32.rs @@ -0,0 +1,4 @@ +/// Application Program Status Register +pub struct APSR; + +rsr!(APSR); diff --git a/crates/core_arch/src/acle/registers/mod.rs b/crates/core_arch/src/acle/registers/mod.rs new file mode 100644 index 0000000000..73fcc2c7b0 --- /dev/null +++ b/crates/core_arch/src/acle/registers/mod.rs @@ -0,0 +1,121 @@ +#[allow(unused_macros)] +macro_rules! rsr { + ($R:ident) => { + impl super::super::sealed::Rsr for $R { + unsafe fn __rsr(&self) -> u32 { + let r: u32; + asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile"); + r + } + } + }; +} + +#[allow(unused_macros)] +macro_rules! rsrp { + ($R:ident) => { + impl super::super::sealed::Rsrp for $R { + unsafe fn __rsrp(&self) -> *const u8 { + let r: *const u8; + asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile"); + r + } + } + }; +} + +#[allow(unused_macros)] +macro_rules! wsr { + ($R:ident) => { + impl super::super::sealed::Wsr for $R { + unsafe fn __wsr(&self, value: u32) { + asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile"); + } + } + }; +} + +#[allow(unused_macros)] +macro_rules! wsrp { + ($R:ident) => { + impl super::super::sealed::Wsrp for $R { + unsafe fn __wsrp(&self, value: *const u8) { + asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile"); + } + } + }; +} + +#[cfg(target_feature = "mclass")] +mod v6m; + +#[cfg(target_feature = "mclass")] +pub use self::v6m::*; + +#[cfg(all(target_feature = "v7", target_feature = "mclass"))] +mod v7m; + +#[cfg(all(target_feature = "v7", target_feature = "mclass"))] +pub use self::v7m::*; + +#[cfg(not(target_arch = "aarch64"))] +mod aarch32; + +#[cfg(not(target_arch = "aarch64"))] +pub use self::aarch32::*; + +/// Reads a 32-bit system register +#[inline(always)] +pub unsafe fn __rsr(reg: R) -> u32 +where + R: super::sealed::Rsr, +{ + reg.__rsr() +} + +/// Reads a 64-bit system register +#[cfg(target_arch = "aarch64")] +#[inline(always)] +pub unsafe fn __rsr64(reg: R) -> u64 +where + R: super::sealed::Rsr64, +{ + reg.__rsr64() +} + +/// Reads a system register containing an address +#[inline(always)] +pub unsafe fn __rsrp(reg: R) -> *const u8 +where + R: super::sealed::Rsrp, +{ + reg.__rsrp() +} + +/// Writes a 32-bit system register +#[inline(always)] +pub unsafe fn __wsr(reg: R, value: u32) +where + R: super::sealed::Wsr, +{ + reg.__wsr(value) +} + +/// Writes a 64-bit system register +#[cfg(target_arch = "aarch64")] +#[inline(always)] +pub unsafe fn __wsr64(reg: R, value: u64) +where + R: super::sealed::Wsr64, +{ + reg.__wsr64(value) +} + +/// Writes a system register containing an address +#[inline(always)] +pub unsafe fn __wsrp(reg: R, value: *const u8) +where + R: super::sealed::Wsrp, +{ + reg.__wsrp(value) +} diff --git a/crates/core_arch/src/acle/registers/v6m.rs b/crates/core_arch/src/acle/registers/v6m.rs new file mode 100644 index 0000000000..7acc63b6d1 --- /dev/null +++ b/crates/core_arch/src/acle/registers/v6m.rs @@ -0,0 +1,39 @@ +/// CONTROL register +pub struct CONTROL; + +rsr!(CONTROL); +wsr!(CONTROL); + +/// Execution Program Status Register +pub struct EPSR; + +rsr!(EPSR); + +/// Interrupt Program Status Register +pub struct IPSR; + +rsr!(IPSR); + +/// Main Stack Pointer +pub struct MSP; + +rsrp!(MSP); +wsrp!(MSP); + +/// Priority Mask Register +pub struct PRIMASK; + +rsr!(PRIMASK); +wsr!(PRIMASK); + +/// Process Stack Pointer +pub struct PSP; + +rsrp!(PSP); +wsrp!(PSP); + +/// Program Status Register +#[allow(non_camel_case_types)] +pub struct xPSR; + +rsr!(xPSR); diff --git a/crates/core_arch/src/acle/registers/v7m.rs b/crates/core_arch/src/acle/registers/v7m.rs new file mode 100644 index 0000000000..d1b1d474f1 --- /dev/null +++ b/crates/core_arch/src/acle/registers/v7m.rs @@ -0,0 +1,17 @@ +/// Base Priority Mask Register +pub struct BASEPRI; + +rsr!(BASEPRI); +wsr!(BASEPRI); + +/// Base Priority Mask Register (conditional write) +#[allow(non_camel_case_types)] +pub struct BASEPRI_MAX; + +wsr!(BASEPRI_MAX); + +/// Fault Mask Register +pub struct FAULTMASK; + +rsr!(FAULTMASK); +wsr!(FAULTMASK); diff --git a/crates/core_arch/src/acle/simd32.rs b/crates/core_arch/src/acle/simd32.rs new file mode 100644 index 0000000000..6b28ec88dc --- /dev/null +++ b/crates/core_arch/src/acle/simd32.rs @@ -0,0 +1,60 @@ +//! # References +//! +//! - Section 8.5 "32-bit SIMD intrinsics" of ACLE +//! +//! Intrinsics that could live here +//! +//! - __ssat16 +//! - __usat16 +//! - __sxtab16 +//! - __sxtb16 +//! - __uxtab16 +//! - __uxtb16 +//! - __qsub8 +//! - __sadd8 +//! - __shadd8 +//! - __shsub8 +//! - __ssub8 +//! - __uadd8 +//! - __uhadd8 +//! - __uhsub8 +//! - __uqadd8 +//! - __uqsub8 +//! - __usub8 +//! - __usad8 +//! - __usada8 +//! - __qadd16 +//! - __qasx +//! - __qsub16 +//! - __sadd16 +//! - __sasx +//! - __shadd16 +//! - __shasx +//! - __shsax +//! - __shsub16 +//! - __ssax +//! - __ssub16 +//! - __uadd16 +//! - __uasx +//! - __uhadd16 +//! - __uhasx +//! - __uhsax +//! - __uhsub16 +//! - __uqadd16 +//! - __uqasx +//! - __uqsax +//! - __uqsub16 +//! - __usax +//! - __usub16 +//! - __smlad +//! - __smladx +//! - __smlald +//! - __smlaldx +//! - __smlsd +//! - __smlsdx +//! - __smlsld +//! - __smlsldx +//! - __smuad +//! - __smuadx +//! - __smusd +//! - __smusdx diff --git a/crates/core_arch/src/arm/cmsis.rs b/crates/core_arch/src/arm/cmsis.rs deleted file mode 100644 index bc8509d3e8..0000000000 --- a/crates/core_arch/src/arm/cmsis.rs +++ /dev/null @@ -1,330 +0,0 @@ -//! CMSIS: Cortex Microcontroller Software Interface Standard -//! -//! The version 5 of the standard can be found at: -//! -//! http://arm-software.github.io/CMSIS_5/Core/html/index.html -//! -//! The API reference of the standard can be found at: -//! -//! - Core function access -- http://arm-software.github.io/CMSIS_5/Core/html/group__Core__Register__gr.html -//! - Intrinsic functions for CPU instructions -- http://arm-software.github.io/CMSIS_5/Core/html/group__intrinsic__CPU__gr.html -//! -//! The reference C implementation used as the base of this Rust port can be -//! found at -//! -//! https://github.com/ARM-software/CMSIS_5/blob/5.3.0/CMSIS/Core/Include/cmsis_gcc.h - -#![allow(non_snake_case)] - -/* Core function access */ - -/// Enable IRQ Interrupts -/// -/// Enables IRQ interrupts by clearing the I-bit in the CPSR. Can only be -/// executed in Privileged modes. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(cpsie))] -pub unsafe fn __enable_irq() { - asm!("cpsie i" : : : "memory" : "volatile"); -} - -/// Disable IRQ Interrupts -/// -/// Disables IRQ interrupts by setting the I-bit in the CPSR. Can only be -/// executed in Privileged modes. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(cpsid))] -pub unsafe fn __disable_irq() { - asm!("cpsid i" : : : "memory" : "volatile"); -} - -/// Get Control Register -/// -/// Returns the content of the Control Register. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(mrs))] -pub unsafe fn __get_CONTROL() -> u32 { - let result: u32; - asm!("mrs $0, CONTROL" : "=r"(result) : : : "volatile"); - result -} - -/// Set Control Register -/// -/// Writes the given value to the Control Register. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(msr))] -pub unsafe fn __set_CONTROL(control: u32) { - asm!("msr CONTROL, $0" : : "r"(control) : "memory" : "volatile"); -} - -/// Get IPSR Register -/// -/// Returns the content of the IPSR Register. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(mrs))] -pub unsafe fn __get_IPSR() -> u32 { - let result: u32; - asm!("mrs $0, IPSR" : "=r"(result) : : : "volatile"); - result -} - -/// Get APSR Register -/// -/// Returns the content of the APSR Register. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(mrs))] -pub unsafe fn __get_APSR() -> u32 { - let result: u32; - asm!("mrs $0, APSR" : "=r"(result) : : : "volatile"); - result -} - -/// Get xPSR Register -/// -/// Returns the content of the xPSR Register. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(mrs))] -pub unsafe fn __get_xPSR() -> u32 { - let result: u32; - asm!("mrs $0, XPSR" : "=r"(result) : : : "volatile"); - result -} - -/// Get Process Stack Pointer -/// -/// Returns the current value of the Process Stack Pointer (PSP). -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(mrs))] -pub unsafe fn __get_PSP() -> u32 { - let result: u32; - asm!("mrs $0, PSP" : "=r"(result) : : : "volatile"); - result -} - -/// Set Process Stack Pointer -/// -/// Assigns the given value to the Process Stack Pointer (PSP). -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(msr))] -pub unsafe fn __set_PSP(top_of_proc_stack: u32) { - asm!("msr PSP, $0" : : "r"(top_of_proc_stack) : : "volatile"); -} - -/// Get Main Stack Pointer -/// -/// Returns the current value of the Main Stack Pointer (MSP). -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(mrs))] -pub unsafe fn __get_MSP() -> u32 { - let result: u32; - asm!("mrs $0, MSP" : "=r"(result) : : : "volatile"); - result -} - -/// Set Main Stack Pointer -/// -/// Assigns the given value to the Main Stack Pointer (MSP). -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(msr))] -pub unsafe fn __set_MSP(top_of_main_stack: u32) { - asm!("msr MSP, $0" : : "r"(top_of_main_stack) : : "volatile"); -} - -/// Get Priority Mask -/// -/// Returns the current state of the priority mask bit from the Priority Mask -/// Register. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(mrs))] -pub unsafe fn __get_PRIMASK() -> u32 { - let result: u32; - asm!("mrs $0, PRIMASK" : "=r"(result) : : "memory" : "volatile"); - result -} - -/// Set Priority Mask -/// -/// Assigns the given value to the Priority Mask Register. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(msr))] -pub unsafe fn __set_PRIMASK(pri_mask: u32) { - asm!("msr PRIMASK, $0" : : "r"(pri_mask) : : "volatile"); -} - -#[cfg(any(target_feature = "v7", dox))] -mod v7 { - /// Enable FIQ - /// - /// Enables FIQ interrupts by clearing the F-bit in the CPSR. Can only be - /// executed in Privileged modes. - #[inline] - #[target_feature(enable = "mclass")] - #[cfg_attr(test, assert_instr(cpsie))] - pub unsafe fn __enable_fault_irq() { - asm!("cpsie f" : : : "memory" : "volatile"); - } - - /// Disable FIQ - /// - /// Disables FIQ interrupts by setting the F-bit in the CPSR. Can only be - /// executed in Privileged modes. - #[inline] - #[target_feature(enable = "mclass")] - #[cfg_attr(test, assert_instr(cpsid))] - pub unsafe fn __disable_fault_irq() { - asm!("cpsid f" : : : "memory" : "volatile"); - } - - /// Get Base Priority - /// - /// Returns the current value of the Base Priority register. - #[inline] - #[target_feature(enable = "mclass")] - #[cfg_attr(test, assert_instr(mrs))] - pub unsafe fn __get_BASEPRI() -> u32 { - let result: u32; - asm!("mrs $0, BASEPRI" : "=r"(result) : : : "volatile"); - result - } - - /// Set Base Priority - /// - /// Assigns the given value to the Base Priority register. - #[inline] - #[target_feature(enable = "mclass")] - #[cfg_attr(test, assert_instr(msr))] - pub unsafe fn __set_BASEPRI(base_pri: u32) { - asm!("msr BASEPRI, $0" : : "r"(base_pri) : "memory" : "volatile"); - } - - /// Set Base Priority with condition - /// - /// Assigns the given value to the Base Priority register only if BASEPRI - /// masking is disabled, or the new value increases the BASEPRI - /// priority level. - #[inline] - #[target_feature(enable = "mclass")] - #[cfg_attr(test, assert_instr(mrs))] - pub unsafe fn __set_BASEPRI_MAX(base_pri: u32) { - asm!("msr BASEPRI_MAX, $0" : : "r"(base_pri) : "memory" : "volatile"); - } - - /// Get Fault Mask - /// - /// Returns the current value of the Fault Mask register. - #[inline] - #[target_feature(enable = "mclass")] - #[cfg_attr(test, assert_instr(mrs))] - pub unsafe fn __get_FAULTMASK() -> u32 { - let result: u32; - asm!("mrs $0, FAULTMASK" : "=r"(result) : : : "volatile"); - result - } - - /// Set Fault Mask - /// - /// Assigns the given value to the Fault Mask register. - #[inline] - #[target_feature(enable = "mclass")] - #[cfg_attr(test, assert_instr(msr))] - pub unsafe fn __set_FAULTMASK(fault_mask: u32) { - asm!("msr FAULTMASK, $0" : : "r"(fault_mask) : "memory" : "volatile"); - } -} - -#[cfg(any(target_feature = "v7", dox))] -pub use self::v7::*; - -/* Core instruction access */ - -/// No Operation -/// -/// No Operation does nothing. This instruction can be used for code alignment -/// purposes. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn __NOP() { - asm!("nop" : : : : "volatile"); -} - -/// Wait For Interrupt -/// -/// Wait For Interrupt is a hint instruction that suspends execution until one -/// of a number of events occurs. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(wfi))] -pub unsafe fn __WFI() { - asm!("wfi" : : : : "volatile"); -} - -/// Wait For Event -/// -/// Wait For Event is a hint instruction that permits the processor to enter a -/// low-power state until one of a number of events occurs. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(wfe))] -pub unsafe fn __WFE() { - asm!("wfe" : : : : "volatile"); -} - -/// Send Event -/// -/// Send Event is a hint instruction. It causes an event to be signaled to the -/// CPU. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(sev))] -pub unsafe fn __SEV() { - asm!("sev" : : : : "volatile"); -} - -/// Instruction Synchronization Barrier -/// -/// Instruction Synchronization Barrier flushes the pipeline in the processor, -/// so that all instructions following the ISB are fetched from cache or -/// memory, after the instruction has been completed. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(isb))] -pub unsafe fn __ISB() { - asm!("isb 0xF" : : : "memory" : "volatile"); -} - -/// Data Synchronization Barrier -/// -/// Acts as a special kind of Data Memory Barrier. It completes when all -/// explicit memory accesses before this instruction complete. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(dsb))] -pub unsafe fn __DSB() { - asm!("dsb 0xF" : : : "memory" : "volatile"); -} - -/// Data Memory Barrier -/// -/// Ensures the apparent order of the explicit memory operations before and -/// after the instruction, without ensuring their completion. -#[inline] -#[target_feature(enable = "mclass")] -#[cfg_attr(test, assert_instr(dmb))] -pub unsafe fn __DMB() { - asm!("dmb 0xF" : : : "memory" : "volatile"); -} diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 30ff991f8d..dd69b11457 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -11,11 +11,6 @@ mod armclang; pub use self::armclang::*; -#[cfg(any(target_feature = "mclass", dox))] -mod cmsis; -#[cfg(any(target_feature = "mclass", dox))] -pub use self::cmsis::*; - mod v6; pub use self::v6::*; @@ -24,6 +19,7 @@ mod v7; #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] pub use self::v7::*; +// TODO move into the `acle::{dsp,simd32}` modules #[cfg(any(all(target_feature = "v7", not(target_feature = "mclass")), dox))] mod dsp; #[cfg(any(all(target_feature = "v7", not(target_feature = "mclass")), dox))] @@ -44,6 +40,8 @@ mod neon; ))] pub use self::neon::*; +pub use super::acle::*; + #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/crates/core_arch/src/mod.rs b/crates/core_arch/src/mod.rs index 9705e091ca..1ca811ee75 100644 --- a/crates/core_arch/src/mod.rs +++ b/crates/core_arch/src/mod.rs @@ -3,6 +3,9 @@ #[macro_use] mod macros; +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +mod acle; + mod simd; #[cfg_attr( From 342786c3e6c9a36a32514ac6724e10f545a30924 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Mon, 3 Sep 2018 19:20:33 +0200 Subject: [PATCH 02/31] fix relative import --- crates/core_arch/src/acle/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index ec7dabd2b3..3d1b0324d5 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -83,7 +83,7 @@ mod dsp; ), all(target_feature = "mclass", target_feature = "dsp"), ))] -pub use dsp::*; +pub use self::dsp::*; // Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says // Section 5.4.9 of ACLE. From 503f9f7cb28b03cd500be77a50803984747e9409 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Mon, 3 Sep 2018 19:21:14 +0200 Subject: [PATCH 03/31] add missing import --- crates/core_arch/src/acle/mod.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 3d1b0324d5..843115a366 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -95,6 +95,12 @@ pub use self::dsp::*; ))] mod simd32; +#[cfg(any( + target_feature = "rclass", + all(target_feature = "mclass", target_feature = "dsp") +))] +pub use self::simd32::*; + mod sealed { pub trait Dmb { unsafe fn __dmb(&self); From b077652204e355e0cc741b0c4005539adbb40b90 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Wed, 13 Feb 2019 17:27:48 +0100 Subject: [PATCH 04/31] acle::hints: use llvm.{arm,aarch64.hint} addresses https://github.com/rust-lang-nursery/stdsimd/pull/557#discussion_r255250217 --- crates/core_arch/src/acle/hints.rs | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/crates/core_arch/src/acle/hints.rs b/crates/core_arch/src/acle/hints.rs index 1b77e5e64c..78d7834073 100644 --- a/crates/core_arch/src/acle/hints.rs +++ b/crates/core_arch/src/acle/hints.rs @@ -12,7 +12,7 @@ #[cfg(any(target_feature = "v6k", target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __wfi() { - asm!("WFI" : : : : "volatile") + hint(HINT_WFI); } /// Generates a WFE (wait for event) hint instruction, or nothing. @@ -25,7 +25,7 @@ pub unsafe fn __wfi() { #[cfg(any(target_feature = "v6k", target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __wfe() { - asm!("WFE" : : : : "volatile") + hint(HINT_WFE); } /// Generates a SEV (send a global event) hint instruction. @@ -37,7 +37,7 @@ pub unsafe fn __wfe() { #[cfg(any(target_feature = "v6k", target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __sev() { - asm!("SEV" : : : : "volatile") + hint(HINT_SEV); } /// Generates a send a local event hint instruction. @@ -49,7 +49,7 @@ pub unsafe fn __sev() { #[cfg(target_arch = "aarch64")] #[inline(always)] pub unsafe fn __sevl() { - asm!("SEVL" : : : : "volatile") + hint(HINT_SEVL); } /// Generates a YIELD hint instruction. @@ -62,7 +62,7 @@ pub unsafe fn __sevl() { #[cfg(any(target_feature = "v6k", target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __yield() { - asm!("YIELD" : : : : "volatile") + hint(HINT_YIELD); } /// Generates a DBG instruction. @@ -111,5 +111,19 @@ pub unsafe fn __dbg(imm4: u32) { /// will increase execution time. #[inline(always)] pub unsafe fn __nop() { - asm!("NOP" : : : : "volatile") + hint(HINT_NOP); } + +extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.hint")] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.hint")] + fn hint(_: i32); +} + +// from LLVM 7.0.1's lib/Target/ARM/{ARMInstrThumb,ARMInstrInfo,ARMInstrThumb2}.td +const HINT_NOP: i32 = 0; +const HINT_YIELD: i32 = 1; +const HINT_WFE: i32 = 2; +const HINT_WFI: i32 = 3; +const HINT_SEV: i32 = 4; +const HINT_SEVL: i32 = 5; From b6672dd6883260c81a8f8bbe32598e9c6dc5cdb5 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Wed, 13 Feb 2019 17:37:46 +0100 Subject: [PATCH 05/31] acle/hints: __dbg requires 'v7' addresses https://github.com/rust-lang-nursery/stdsimd/pull/557#discussion_r255250415 --- crates/core_arch/src/acle/hints.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/crates/core_arch/src/acle/hints.rs b/crates/core_arch/src/acle/hints.rs index 78d7834073..52d47cc9bf 100644 --- a/crates/core_arch/src/acle/hints.rs +++ b/crates/core_arch/src/acle/hints.rs @@ -72,8 +72,11 @@ pub unsafe fn __yield() { /// for the effect (if any) of this instruction and the meaning of the /// argument. This is available only when compliling for AArch32. // Section 10.1 of ACLE says that the supported arches are: 7, 7-M -// LLVM says "instruction requires: thumb2" OR "instruction requires: armv7" -#[cfg(target_feature = "v6t2")] +// "The DBG hint instruction is added in ARMv7. It is UNDEFINED in the ARMv6 base architecture, and +// executes as a NOP instruction in ARMv6K and ARMv6T2." - ARM Architecture Reference Manual ARMv7-A +// and ARMv7-R edition (ARM DDI 0406C.c) sections D12.4.1 "ARM instruction set support" and D12.4.2 +// "Thumb instruction set support" +#[cfg(target_feature = "v7")] #[inline(always)] #[rustc_args_required_const(0)] pub unsafe fn __dbg(imm4: u32) { From 54223249e3da5ab856a13afd2564eff82d28c5b9 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Wed, 13 Feb 2019 17:44:54 +0100 Subject: [PATCH 06/31] acle/hints: use asm! for __nop addresses https://github.com/rust-lang-nursery/stdsimd/pull/557#discussion_r255250890 --- crates/core_arch/src/acle/hints.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_arch/src/acle/hints.rs b/crates/core_arch/src/acle/hints.rs index 52d47cc9bf..6f9a2dfb21 100644 --- a/crates/core_arch/src/acle/hints.rs +++ b/crates/core_arch/src/acle/hints.rs @@ -114,7 +114,7 @@ pub unsafe fn __dbg(imm4: u32) { /// will increase execution time. #[inline(always)] pub unsafe fn __nop() { - hint(HINT_NOP); + asm!("NOP" : : : : "volatile") } extern "C" { From 5973346dc772cbdf96f459041bd0c7276279f33c Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Wed, 13 Feb 2019 18:29:48 +0100 Subject: [PATCH 07/31] acle/hints: most hints require 'v6' rather than 'v6k' addresses https://github.com/rust-lang-nursery/stdsimd/pull/557#discussion_r255251241 --- crates/core_arch/src/acle/hints.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/core_arch/src/acle/hints.rs b/crates/core_arch/src/acle/hints.rs index 6f9a2dfb21..6f33fe3419 100644 --- a/crates/core_arch/src/acle/hints.rs +++ b/crates/core_arch/src/acle/hints.rs @@ -9,7 +9,7 @@ /// low-power state until one of a number of asynchronous events occurs. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M // LLVM says "instruction requires: armv6k" -#[cfg(any(target_feature = "v6k", target_arch = "aarch64"))] +#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __wfi() { hint(HINT_WFI); @@ -22,7 +22,7 @@ pub unsafe fn __wfi() { /// another processor. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M // LLVM says "instruction requires: armv6k" -#[cfg(any(target_feature = "v6k", target_arch = "aarch64"))] +#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __wfe() { hint(HINT_WFE); @@ -34,7 +34,7 @@ pub unsafe fn __wfe() { /// system. It is a NOP on a uniprocessor system. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M, 7-M // LLVM says "instruction requires: armv6k" -#[cfg(any(target_feature = "v6k", target_arch = "aarch64"))] +#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __sev() { hint(HINT_SEV); @@ -59,7 +59,7 @@ pub unsafe fn __sevl() { /// improve overall system performance. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M // LLVM says "instruction requires: armv6k" -#[cfg(any(target_feature = "v6k", target_arch = "aarch64"))] +#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __yield() { hint(HINT_YIELD); From b3f0f28a4b7a577b1a2d8ac449f0611d0b68a4ce Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Wed, 13 Feb 2019 18:35:26 +0100 Subject: [PATCH 08/31] acle/simd32: also expose on the A profile addresses https://github.com/rust-lang-nursery/stdsimd/pull/557#discussion_r255253933 --- crates/core_arch/src/acle/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 843115a366..59197a0df9 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -86,17 +86,17 @@ mod dsp; pub use self::dsp::*; // Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says -// Section 5.4.9 of ACLE. +// Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated #[cfg(any( - // v7-R - target_feature = "rclass", + // v7-A, v7-R + all(target_feature = "v6", not(target_feature = "mclass")), // v7E-M all(target_feature = "mclass", target_feature = "dsp") ))] mod simd32; #[cfg(any( - target_feature = "rclass", + all(target_feature = "v6", not(target_feature = "mclass")), all(target_feature = "mclass", target_feature = "dsp") ))] pub use self::simd32::*; From 25961be88f8be13a02399b0ed68bf7ba8b6066d2 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Wed, 13 Feb 2019 19:06:01 +0100 Subject: [PATCH 09/31] acle/dsp: make available on the A profile --- crates/core_arch/src/acle/mod.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 59197a0df9..4d7e45af0d 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -65,22 +65,18 @@ mod registers; pub use self::registers::*; // Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD) -// But we also exclude the A profile because DSP is deprecated on that profile as of ACLE 2.0 (see +// We also include the A profile even though DSP is deprecated on that profile as of ACLE 2.0 (see // section 5.4.7) #[cfg(any( // >= v5TE but excludes v7-A - all(target_feature = "v5te", not(target_feature = "mclass"), not(target_feature = "aclass")), + all(target_feature = "v5te", not(target_feature = "mclass")), // v7E-M all(target_feature = "mclass", target_feature = "dsp"), ))] mod dsp; #[cfg(any( - all( - target_feature = "v5te", - not(target_feature = "mclass"), - not(target_feature = "aclass") - ), + all(target_feature = "v5te", not(target_feature = "mclass")), all(target_feature = "mclass", target_feature = "dsp"), ))] pub use self::dsp::*; From 74ddada5126a95537c26d74f638bbd075da66b9c Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Wed, 13 Feb 2019 19:45:27 +0100 Subject: [PATCH 10/31] acle/barrier: use llvm.{arm,aarch64}.{dmb,dsb,isb} instead of asm! also make these available on architectures that don't have a dedicated DMB / DSB / ISB instruction addresses https://github.com/rust-lang-nursery/stdsimd/pull/557#discussion_r255312214 --- crates/core_arch/src/acle/barrier/common.rs | 2 +- crates/core_arch/src/acle/barrier/cp15.rs | 27 ++++++++ crates/core_arch/src/acle/barrier/mod.rs | 74 +++++++++++++++++++-- crates/core_arch/src/acle/mod.rs | 12 +--- 4 files changed, 100 insertions(+), 15 deletions(-) create mode 100644 crates/core_arch/src/acle/barrier/cp15.rs diff --git a/crates/core_arch/src/acle/barrier/common.rs b/crates/core_arch/src/acle/barrier/common.rs index a1d8c93e8c..0fb35534d1 100644 --- a/crates/core_arch/src/acle/barrier/common.rs +++ b/crates/core_arch/src/acle/barrier/common.rs @@ -9,6 +9,6 @@ dmb_dsb!(SY); impl super::super::sealed::Isb for SY { #[inline(always)] unsafe fn __isb(&self) { - asm!("ISB SY" : : : "memory" : "volatile") + super::isb(super::arg::SY) } } diff --git a/crates/core_arch/src/acle/barrier/cp15.rs b/crates/core_arch/src/acle/barrier/cp15.rs new file mode 100644 index 0000000000..7938acbbb4 --- /dev/null +++ b/crates/core_arch/src/acle/barrier/cp15.rs @@ -0,0 +1,27 @@ +// Reference: ARM11 MPCore Processor Technical Reference Manual (ARM DDI 0360E) Section 3.5 "Summary +// of CP15 instructions" + +/// Full system is the required shareability domain, reads and writes are the +/// required access types +pub struct SY; + +impl super::super::sealed::Dmb for SY { + #[inline(always)] + unsafe fn __dmb(&self) { + asm!("mcr p15, 0, r0, c7, c10, 5" : : : "memory" : "volatile") + } +} + +impl super::super::sealed::Dsb for SY { + #[inline(always)] + unsafe fn __dsb(&self) { + asm!("mcr p15, 0, r0, c7, c10, 4" : : : "memory" : "volatile") + } +} + +impl super::super::sealed::Isb for SY { + #[inline(always)] + unsafe fn __isb(&self) { + asm!("mcr p15, 0, r0, c7, c5, 4" : : : "memory" : "volatile") + } +} diff --git a/crates/core_arch/src/acle/barrier/mod.rs b/crates/core_arch/src/acle/barrier/mod.rs index 3fbf6899ff..61686895f0 100644 --- a/crates/core_arch/src/acle/barrier/mod.rs +++ b/crates/core_arch/src/acle/barrier/mod.rs @@ -1,31 +1,66 @@ // Reference: Section 7.4 "Hints" of ACLE +// CP15 instruction +#[cfg(not(any( + // v8 + target_arch = "aarch64", + // v7 + target_feature = "v7", + // v6-M + target_feature = "mclass" +)))] +mod cp15; + +#[cfg(not(any( + target_arch = "aarch64", + target_feature = "v7", + target_feature = "mclass" +)))] +pub use self::cp15::*; + +// Dedicated instructions macro_rules! dmb_dsb { ($A:ident) => { impl super::super::sealed::Dmb for $A { #[inline(always)] unsafe fn __dmb(&self) { - asm!(concat!("DMB ", stringify!($A)) : : : "memory" : "volatile") + super::dmb(super::arg::$A) } } impl super::super::sealed::Dsb for $A { #[inline(always)] unsafe fn __dsb(&self) { - asm!(concat!("DSB ", stringify!($A)) : : : "memory" : "volatile") + super::dsb(super::arg::$A) } } }; } +#[cfg(any( + target_arch = "aarch64", + target_feature = "v7", + target_feature = "mclass" +))] mod common; +#[cfg(any( + target_arch = "aarch64", + target_feature = "v7", + target_feature = "mclass" +))] pub use self::common::*; -#[cfg(not(target_feature = "mclass"))] +#[cfg(any( + target_arch = "aarch64", + target_feature = "v7", +))] mod not_mclass; -#[cfg(not(target_feature = "mclass"))] +#[cfg(any( + target_arch = "aarch64", + target_feature = "v7", +))] pub use self::not_mclass::*; #[cfg(target_arch = "aarch64")] @@ -87,3 +122,34 @@ where { arg.__isb() } + +extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.dmb")] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dmb")] + fn dmb(_: i32); + + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.dsb")] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dsb")] + fn dsb(_: i32); + + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.isb")] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.isb")] + fn isb(_: i32); +} + +// we put these in a module to prevent weirdness with glob re-exports +mod arg { + // See Section 7.3 Memory barriers of ACLE + pub const SY: i32 = 15; + pub const ST: i32 = 14; + pub const LD: i32 = 13; + pub const ISH: i32 = 11; + pub const ISHST: i32 = 10; + pub const ISHLD: i32 = 9; + pub const NSH: i32 = 7; + pub const NSHST: i32 = 6; + pub const NSHLD: i32 = 5; + pub const OSH: i32 = 3; + pub const OSHST: i32 = 2; + pub const OSHLD: i32 = 1; +} diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 4d7e45af0d..83aaacb5eb 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -37,16 +37,8 @@ //! //! - [ACLE Q2 2018](https://developer.arm.com/docs/101028/latest) -// Supported arches: 8, 7, 6-M. See Section 10.1 of ACLE (e.g. DMB) -// But this is further refined within the module -#[cfg(any( - // v8 - target_arch = "aarch64", - // v7 - target_feature = "v7", - // v6-M - target_feature = "mclass" -))] +// 8, 7 and 6-M are supported via dedicated instructions like DMB. All other arches are supported +// via CP15 instructions. See Section 10.1 of ACLE mod barrier; #[cfg(any( From d91daffe239e2079881dbf44825d961b27215d33 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Wed, 13 Feb 2019 19:52:15 +0100 Subject: [PATCH 11/31] acle/{simd32,dsp}: not available on aarch64 addresses https://github.com/rust-lang-nursery/stdsimd/pull/557#discussion_r255312249 https://github.com/rust-lang-nursery/stdsimd/pull/557#discussion_r255312264 --- crates/core_arch/src/acle/mod.rs | 44 ++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 83aaacb5eb..d173246511 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -59,33 +59,45 @@ pub use self::registers::*; // Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD) // We also include the A profile even though DSP is deprecated on that profile as of ACLE 2.0 (see // section 5.4.7) -#[cfg(any( - // >= v5TE but excludes v7-A - all(target_feature = "v5te", not(target_feature = "mclass")), - // v7E-M - all(target_feature = "mclass", target_feature = "dsp"), +#[cfg(all( + not(target_arch = "aarch64"), + any( + // >= v5TE but excludes v7-A + all(target_feature = "v5te", not(target_feature = "mclass")), + // v7E-M + all(target_feature = "mclass", target_feature = "dsp"), + ) ))] mod dsp; -#[cfg(any( - all(target_feature = "v5te", not(target_feature = "mclass")), - all(target_feature = "mclass", target_feature = "dsp"), +#[cfg(all( + not(target_arch = "aarch64"), + any( + all(target_feature = "v5te", not(target_feature = "mclass")), + all(target_feature = "mclass", target_feature = "dsp"), + ) ))] pub use self::dsp::*; // Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says // Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated -#[cfg(any( - // v7-A, v7-R - all(target_feature = "v6", not(target_feature = "mclass")), - // v7E-M - all(target_feature = "mclass", target_feature = "dsp") +#[cfg(all( + not(target_arch = "aarch64"), + any( + // v7-A, v7-R + all(target_feature = "v6", not(target_feature = "mclass")), + // v7E-M + all(target_feature = "mclass", target_feature = "dsp") + ) ))] mod simd32; -#[cfg(any( - all(target_feature = "v6", not(target_feature = "mclass")), - all(target_feature = "mclass", target_feature = "dsp") +#[cfg(all( + not(target_arch = "aarch64"), + any( + all(target_feature = "v6", not(target_feature = "mclass")), + all(target_feature = "mclass", target_feature = "dsp") + ) ))] pub use self::simd32::*; From 0d8151926c208e6f0ac4505607efb28b8b633607 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Wed, 13 Feb 2019 20:05:32 +0100 Subject: [PATCH 12/31] acle: move saturating intrinsics into its own module addresses https://github.com/rust-lang-nursery/stdsimd/pull/557#discussion_r255312560 --- crates/core_arch/src/acle/dsp.rs | 3 --- crates/core_arch/src/acle/mod.rs | 13 +++++++++++++ crates/core_arch/src/acle/sat.rs | 8 ++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) create mode 100644 crates/core_arch/src/acle/sat.rs diff --git a/crates/core_arch/src/acle/dsp.rs b/crates/core_arch/src/acle/dsp.rs index 4029e7aaa3..31817ea870 100644 --- a/crates/core_arch/src/acle/dsp.rs +++ b/crates/core_arch/src/acle/dsp.rs @@ -1,7 +1,6 @@ //! # References: //! //! - Section 8.3 "16-bit multiplications" -//! - Section 8.4 "Saturating intrinsics" //! //! Intrinsics that could live here: //! @@ -11,8 +10,6 @@ //! - __smultt //! - __smulwb //! - __smulwt -//! - __ssat -//! - __usat //! - __qadd //! - __qsub //! - __qdbl diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index d173246511..f1be11b27b 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -79,6 +79,19 @@ mod dsp; ))] pub use self::dsp::*; +// Supported arches: 6, 7-M. See Section 10.1 of ACLE (e.g. SSAT) +#[cfg(all( + not(target_arch = "aarch64"), + target_feature = "v6", +))] +mod sat; + +#[cfg(all( + not(target_arch = "aarch64"), + target_feature = "v6", +))] +pub use self::sat::*; + // Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says // Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated #[cfg(all( diff --git a/crates/core_arch/src/acle/sat.rs b/crates/core_arch/src/acle/sat.rs new file mode 100644 index 0000000000..38c98d7342 --- /dev/null +++ b/crates/core_arch/src/acle/sat.rs @@ -0,0 +1,8 @@ +//! # References: +//! +//! - Section 8.4 "Saturating intrinsics" +//! +//! Intrinsics that could live here: +//! +//! - __ssat +//! - __usat From 50b51260cd8dcb93e3b2dd07ca8afffce4669637 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 14 Feb 2019 12:29:46 +0100 Subject: [PATCH 13/31] acle/hints: gate sevl on 'v8' rather than on 'aarch64' addresses https://github.com/rust-lang-nursery/stdsimd/pull/557#discussion_r256553546 --- crates/core_arch/src/acle/hints.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_arch/src/acle/hints.rs b/crates/core_arch/src/acle/hints.rs index 6f33fe3419..2288eb3c9e 100644 --- a/crates/core_arch/src/acle/hints.rs +++ b/crates/core_arch/src/acle/hints.rs @@ -46,7 +46,7 @@ pub unsafe fn __sev() { /// instruction. In a multiprocessor system, it is not required to affect the /// other processors. // LLVM says "instruction requires: armv8" -#[cfg(target_arch = "aarch64")] +#[cfg(target_feature = "v8")] #[inline(always)] pub unsafe fn __sevl() { hint(HINT_SEVL); From b14287e42a4c263bcda2050747f9a2110aa24f63 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 14 Feb 2019 12:37:11 +0100 Subject: [PATCH 14/31] acle/barrier: remove cfg from re-export addresses https://github.com/rust-lang-nursery/stdsimd/pull/557#discussion_r256556043 --- crates/core_arch/src/acle/mod.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index f1be11b27b..a9b4dbfd72 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -41,11 +41,6 @@ // via CP15 instructions. See Section 10.1 of ACLE mod barrier; -#[cfg(any( - target_arch = "aarch64", - target_feature = "v7", - target_feature = "mclass" -))] pub use self::barrier::*; mod hints; From 81ad6208986c73321852e7db2c667506f98218c7 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 14 Feb 2019 12:38:27 +0100 Subject: [PATCH 15/31] acle/dsp: update comment addresses https://github.com/rust-lang-nursery/stdsimd/pull/557#discussion_r256556341 --- crates/core_arch/src/acle/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index a9b4dbfd72..7d281538f5 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -57,7 +57,7 @@ pub use self::registers::*; #[cfg(all( not(target_arch = "aarch64"), any( - // >= v5TE but excludes v7-A + // >= v5TE but excludes v7-M all(target_feature = "v5te", not(target_feature = "mclass")), // v7E-M all(target_feature = "mclass", target_feature = "dsp"), From fa0deb38ffe32b0e15e3fba3fe526dc806190d24 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 14 Feb 2019 12:49:36 +0100 Subject: [PATCH 16/31] acle/dsp: note the difference between LLVM's +dsp and ACLE's __ARM_FEATURE_DSP addresses https://github.com/rust-lang-nursery/stdsimd/pull/557#discussion_r256597576 --- crates/core_arch/src/acle/mod.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 7d281538f5..a650173618 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -33,6 +33,14 @@ //! - `v6 < v8m < v6t2` //! - `v7 < v8m.main` //! +//! *NOTE*: Section 5.4.7 of ACLE says: +//! +//! - "__ARM_FEATURE_DSP is defined to 1 if the DSP (v5E) instructions are supported and the +//! intrinsics defined in Saturating intrinsics are available." +//! +//! This does *not* match how LLVM uses the '+dsp' feature; this feature is not set for v5te +//! targets so we have to work around this difference. +//! //! # References //! //! - [ACLE Q2 2018](https://developer.arm.com/docs/101028/latest) @@ -54,6 +62,8 @@ pub use self::registers::*; // Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD) // We also include the A profile even though DSP is deprecated on that profile as of ACLE 2.0 (see // section 5.4.7) +// Here we workaround the difference between LLVM's +dsp and ACLE's __ARM_FEATURE_DSP by gating on +// '+v5te' rather than on '+dsp' #[cfg(all( not(target_arch = "aarch64"), any( From b3822ef729750c96d8bba1b4003e3c65335e3027 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 14 Feb 2019 15:38:43 +0100 Subject: [PATCH 17/31] acle: add ldrex, clrex and strex --- crates/core_arch/src/acle/ex.rs | 115 +++++++++++++++++++++++++++++++ crates/core_arch/src/acle/mod.rs | 4 ++ 2 files changed, 119 insertions(+) create mode 100644 crates/core_arch/src/acle/ex.rs diff --git a/crates/core_arch/src/acle/ex.rs b/crates/core_arch/src/acle/ex.rs new file mode 100644 index 0000000000..c25e0dc37a --- /dev/null +++ b/crates/core_arch/src/acle/ex.rs @@ -0,0 +1,115 @@ +// Reference: Section 5.4.4 "LDREX / STREX" of ACLE + +/// Removes the exclusive lock created by LDREX +// Supported: v6, v6K, v7-M, v7-A, v7-R +// Not supported: v5, v6-M +#[cfg(any( + all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M + all(target_feature = "v7", target_feature = "mclass"), // v7-M +))] +pub unsafe fn __clrex() { + extern "C" { + #[link_name = "llvm.arm.clrex"] + fn clrex(); + } + + clrex() +} + +/// Executes a exclusive LDR instruction for 8 bit value. +// Supported: v6K, v7-M, v7-A, v7-R +// Not supported: v5, v6, v6-M +#[cfg( + target_feature = "v6k", // includes v7-M but excludes v6-M +)] +pub unsafe fn __ldrexb(p: *const u8) -> u8 { + extern "C" { + #[link_name = "llvm.arm.ldrex.p0i8"] + fn ldrex8(p: *const u8) -> u32; + } + + ldrex8(p) as u8 +} + +/// Executes a exclusive LDR instruction for 16 bit value. +// Supported: v6K, v7-M, v7-A, v7-R, v8 +// Not supported: v5, v6, v6-M +#[cfg( + target_feature = "v6k", // includes v7-M but excludes v6-M +)] +pub unsafe fn __ldrexh(p: *const u16) -> u16 { + extern "C" { + #[link_name = "llvm.arm.ldrex.p0i16"] + fn ldrex16(p: *const u16) -> u32; + } + + ldrex16(p) as u16 +} + +/// Executes a exclusive LDR instruction for 32 bit value. +// Supported: v6, v7-M, v6K, v7-A, v7-R, v8 +// Not supported: v5, v6-M +#[cfg(any( + all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M + all(target_feature = "v7", target_feature = "mclass"), // v7-M +))] +pub unsafe fn __ldrex(p: *const u32) -> u32 { + extern "C" { + #[link_name = "llvm.arm.ldrex.p0i32"] + fn ldrex32(p: *const u32) -> u32; + } + + ldrex32(p) +} + +/// Executes a exclusive STR instruction for 8 bit values +/// +/// Returns `0` if the operation succeeded, or `1` if it failed +// supported: v6K, v7-M, v7-A, v7-R +// Not supported: v5, v6, v6-M +#[cfg( + target_feature = "v6k", // includes v7-M but excludes v6-M +)] +pub unsafe fn __strexb(value: u32, addr: *const u8) -> u32 { + extern "C" { + #[link_name = "llvm.arm.strex.p0i8"] + fn strex8(value: u32, addr: *const u8) -> u32; + } + + strex8(value, addr) +} + +/// Executes a exclusive STR instruction for 16 bit values +/// +/// Returns `0` if the operation succeeded, or `1` if it failed +// Supported: v6K, v7-M, v7-A, v7-R, v8 +// Not supported: v5, v6, v6-M +#[cfg( + target_feature = "v6k", // includes v7-M but excludes v6-M +)] +pub unsafe fn __strexh(value: u16, addr: *const u16) -> u32 { + extern "C" { + #[link_name = "llvm.arm.strex.p0i16"] + fn strex16(value: u32, addr: *const u16) -> u32; + } + + strex16(value as u32, addr) +} + +/// Executes a exclusive STR instruction for 32 bit values +/// +/// Returns `0` if the operation succeeded, or `1` if it failed +// Supported: v6, v7-M, v6K, v7-A, v7-R, v8 +// Not supported: v5, v6-M +#[cfg(any( + all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M + all(target_feature = "v7", target_feature = "mclass"), // v7-M +))] +pub unsafe fn __strex(value: u32, addr: *const u32) -> u32 { + extern "C" { + #[link_name = "llvm.arm.strex.p0i32"] + fn strex32(value: u32, addr: *const u32) -> u32; + } + + strex32(value, addr) +} diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index a650173618..068edc8aa2 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -59,6 +59,10 @@ mod registers; pub use self::registers::*; +mod ex; + +pub use self::ex::*; + // Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD) // We also include the A profile even though DSP is deprecated on that profile as of ACLE 2.0 (see // section 5.4.7) From e67cf235c5404b6bb9ae98e2a21dd7be6466f582 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 14 Feb 2019 15:51:33 +0100 Subject: [PATCH 18/31] acle/docs: add armv8-m and armv8-r to the list of rustc targets & llvm features --- crates/core_arch/src/acle/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 068edc8aa2..1354fe9fae 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -13,6 +13,8 @@ //! - `armv7r-none-eabi` - **ARMv7-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +rclass` //! - `thumbv7m-none-eabi` - **ARMv7-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass` //! - `thumbv7em-none-eabi` - **ARMv7E-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +thumb-mode +mclass` +//! - `thumbv8m.main-none-eabi` - **ARMv8-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass` +//! - `armv8r-none-eabi` - **ARMv8-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +v8 +thumb2 +rclass` //! - `aarch64-unknown-linux-gnu` - **ARMv8-A (AArch64)** - `+fp +neon` //! //! Section 10.1 of ACLE says: From 53f320e98120dff5b16e05e0bf00c31e31549c07 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 14 Feb 2019 15:54:08 +0100 Subject: [PATCH 19/31] acle/hints: make sevl truly available on aarch64 addresses https://github.com/rust-lang-nursery/stdsimd/pull/557#discussion_r256864336 --- crates/core_arch/src/acle/hints.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/core_arch/src/acle/hints.rs b/crates/core_arch/src/acle/hints.rs index 2288eb3c9e..20faed69cb 100644 --- a/crates/core_arch/src/acle/hints.rs +++ b/crates/core_arch/src/acle/hints.rs @@ -46,7 +46,10 @@ pub unsafe fn __sev() { /// instruction. In a multiprocessor system, it is not required to affect the /// other processors. // LLVM says "instruction requires: armv8" -#[cfg(target_feature = "v8")] +#[cfg(any( + target_feature = "v8", // 32-bit ARMv8 + target_arch = "aarch64", // AArch64 +))] #[inline(always)] pub unsafe fn __sevl() { hint(HINT_SEVL); From cbfd8d0e3e36e8b741fa74dd5ff6af9d6c5a0855 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 14 Feb 2019 17:08:57 +0100 Subject: [PATCH 20/31] acle: move arm/dsp into acle/{dsp,simd32} addresses https://github.com/rust-lang-nursery/stdsimd/pull/557#discussion_r255312454 --- crates/core_arch/src/acle/dsp.rs | 57 ++- crates/core_arch/src/acle/simd32.rs | 732 ++++++++++++++++++++++++++-- crates/core_arch/src/arm/dsp.rs | 654 ------------------------- crates/core_arch/src/arm/mod.rs | 6 - 4 files changed, 720 insertions(+), 729 deletions(-) delete mode 100644 crates/core_arch/src/arm/dsp.rs diff --git a/crates/core_arch/src/acle/dsp.rs b/crates/core_arch/src/acle/dsp.rs index 31817ea870..3a71f2c469 100644 --- a/crates/core_arch/src/acle/dsp.rs +++ b/crates/core_arch/src/acle/dsp.rs @@ -4,18 +4,45 @@ //! //! Intrinsics that could live here: //! -//! - __smulbb -//! - __smulbt -//! - __smultb -//! - __smultt -//! - __smulwb -//! - __smulwt -//! - __qadd -//! - __qsub -//! - __qdbl -//! - __smlabb -//! - __smlabt -//! - __smlatb -//! - __smlatt -//! - __smlawb -//! - __smlawt +//! - [ ] __smulbb +//! - [ ] __smulbt +//! - [ ] __smultb +//! - [ ] __smultt +//! - [ ] __smulwb +//! - [ ] __smulwt +//! - [x] __qadd +//! - [x] __qsub +//! - [ ] __qdbl +//! - [ ] __smlabb +//! - [ ] __smlabt +//! - [ ] __smlatb +//! - [ ] __smlatt +//! - [ ] __smlawb +//! - [ ] __smlawt + +extern "C" { + #[link_name = "llvm.arm.qadd"] + fn arm_qadd(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.qsub"] + fn arm_qsub(a: i32, b: i32) -> i32; + +} + +/// Signed saturating addition +/// +/// Returns the 32-bit saturating signed equivalent of a + b. +#[inline] +#[cfg_attr(test, assert_instr(qadd))] +pub unsafe fn qadd(a: i32, b: i32) -> i32 { + arm_qadd(a, b) +} + +/// Signed saturating subtraction +/// +/// Returns the 32-bit saturating signed equivalent of a - b. +#[inline] +#[cfg_attr(test, assert_instr(qsub))] +pub unsafe fn qsub(a: i32, b: i32) -> i32 { + arm_qsub(a, b) +} diff --git a/crates/core_arch/src/acle/simd32.rs b/crates/core_arch/src/acle/simd32.rs index 6b28ec88dc..420ce2b2d0 100644 --- a/crates/core_arch/src/acle/simd32.rs +++ b/crates/core_arch/src/acle/simd32.rs @@ -4,57 +4,681 @@ //! //! Intrinsics that could live here //! -//! - __ssat16 -//! - __usat16 -//! - __sxtab16 -//! - __sxtb16 -//! - __uxtab16 -//! - __uxtb16 -//! - __qsub8 -//! - __sadd8 -//! - __shadd8 -//! - __shsub8 -//! - __ssub8 -//! - __uadd8 -//! - __uhadd8 -//! - __uhsub8 -//! - __uqadd8 -//! - __uqsub8 -//! - __usub8 -//! - __usad8 -//! - __usada8 -//! - __qadd16 -//! - __qasx -//! - __qsub16 -//! - __sadd16 -//! - __sasx -//! - __shadd16 -//! - __shasx -//! - __shsax -//! - __shsub16 -//! - __ssax -//! - __ssub16 -//! - __uadd16 -//! - __uasx -//! - __uhadd16 -//! - __uhasx -//! - __uhsax -//! - __uhsub16 -//! - __uqadd16 -//! - __uqasx -//! - __uqsax -//! - __uqsub16 -//! - __usax -//! - __usub16 -//! - __smlad -//! - __smladx -//! - __smlald -//! - __smlaldx -//! - __smlsd -//! - __smlsdx -//! - __smlsld -//! - __smlsldx -//! - __smuad -//! - __smuadx -//! - __smusd -//! - __smusdx +//! - [x] __sel +//! - [ ] __ssat16 +//! - [ ] __usat16 +//! - [ ] __sxtab16 +//! - [ ] __sxtb16 +//! - [ ] __uxtab16 +//! - [ ] __uxtb16 +//! - [x] __qadd8 +//! - [x] __qsub8 +//! - [x] __sadd8 +//! - [x] __shadd8 +//! - [x] __shsub8 +//! - [ ] __ssub8 +//! - [ ] __uadd8 +//! - [ ] __uhadd8 +//! - [ ] __uhsub8 +//! - [ ] __uqadd8 +//! - [ ] __uqsub8 +//! - [ ] __usub8 +//! - [x] __usad8 +//! - [x] __usada8 +//! - [x] __qadd16 +//! - [x] __qasx +//! - [x] __qsax +//! - [x] __qsub16 +//! - [x] __sadd16 +//! - [x] __sasx +//! - [x] __shadd16 +//! - [ ] __shasx +//! - [ ] __shsax +//! - [x] __shsub16 +//! - [ ] __ssax +//! - [ ] __ssub16 +//! - [ ] __uadd16 +//! - [ ] __uasx +//! - [ ] __uhadd16 +//! - [ ] __uhasx +//! - [ ] __uhsax +//! - [ ] __uhsub16 +//! - [ ] __uqadd16 +//! - [ ] __uqasx +//! - [x] __uqsax +//! - [ ] __uqsub16 +//! - [ ] __usax +//! - [ ] __usub16 +//! - [x] __smlad +//! - [ ] __smladx +//! - [ ] __smlald +//! - [ ] __smlaldx +//! - [x] __smlsd +//! - [ ] __smlsdx +//! - [ ] __smlsld +//! - [ ] __smlsldx +//! - [x] __smuad +//! - [x] __smuadx +//! - [x] __smusd +//! - [x] __smusdx + +types! { + /// ARM-specific 32-bit wide vector of four packed `i8`. + pub struct int8x4_t(i8, i8, i8, i8); + /// ARM-specific 32-bit wide vector of four packed `u8`. + pub struct uint8x4_t(u8, u8, u8, u8); + /// ARM-specific 32-bit wide vector of two packed `i16`. + pub struct int16x2_t(i16, i16); + /// ARM-specific 32-bit wide vector of two packed `u16`. + pub struct uint16x2_t(u16, u16); +} + +macro_rules! dsp_call { + ($name:expr, $a:expr, $b:expr) => { + ::mem::transmute($name(::mem::transmute($a), ::mem::transmute($b))) + }; +} + +extern "C" { + #[link_name = "llvm.arm.qadd8"] + fn arm_qadd8(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.qsub8"] + fn arm_qsub8(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.qsub16"] + fn arm_qsub16(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.qadd16"] + fn arm_qadd16(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.qasx"] + fn arm_qasx(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.qsax"] + fn arm_qsax(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.sadd16"] + fn arm_sadd16(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.sadd8"] + fn arm_sadd8(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.smlad"] + fn arm_smlad(a: i32, b: i32, c: i32) -> i32; + + #[link_name = "llvm.arm.smlsd"] + fn arm_smlsd(a: i32, b: i32, c: i32) -> i32; + + #[link_name = "llvm.arm.sasx"] + fn arm_sasx(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.sel"] + fn arm_sel(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.shadd8"] + fn arm_shadd8(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.shadd16"] + fn arm_shadd16(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.shsub8"] + fn arm_shsub8(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.shsub16"] + fn arm_shsub16(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.smuad"] + fn arm_smuad(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.smuadx"] + fn arm_smuadx(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.smusd"] + fn arm_smusd(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.smusdx"] + fn arm_smusdx(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.usad8"] + fn arm_usad8(a: i32, b: i32) -> u32; +} + +/// Saturating four 8-bit integer additions +/// +/// Returns the 8-bit signed equivalent of +/// +/// res\[0\] = a\[0\] + b\[0\] +/// res\[1\] = a\[1\] + b\[1\] +/// res\[2\] = a\[2\] + b\[2\] +/// res\[3\] = a\[3\] + b\[3\] +#[inline] +#[cfg_attr(test, assert_instr(qadd8))] +pub unsafe fn qadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { + dsp_call!(arm_qadd8, a, b) +} + +/// Saturating two 8-bit integer subtraction +/// +/// Returns the 8-bit signed equivalent of +/// +/// res\[0\] = a\[0\] - b\[0\] +/// res\[1\] = a\[1\] - b\[1\] +/// res\[2\] = a\[2\] - b\[2\] +/// res\[3\] = a\[3\] - b\[3\] +#[inline] +#[cfg_attr(test, assert_instr(qsub8))] +pub unsafe fn qsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { + dsp_call!(arm_qsub8, a, b) +} + +/// Saturating two 16-bit integer subtraction +/// +/// Returns the 16-bit signed equivalent of +/// +/// res\[0\] = a\[0\] - b\[0\] +/// res\[1\] = a\[1\] - b\[1\] +#[inline] +#[cfg_attr(test, assert_instr(qsub16))] +pub unsafe fn qsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { + dsp_call!(arm_qsub16, a, b) +} + +/// Saturating two 16-bit integer additions +/// +/// Returns the 16-bit signed equivalent of +/// +/// res\[0\] = a\[0\] + b\[0\] +/// res\[1\] = a\[1\] + b\[1\] +#[inline] +#[cfg_attr(test, assert_instr(qadd16))] +pub unsafe fn qadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { + dsp_call!(arm_qadd16, a, b) +} + +/// Returns the 16-bit signed saturated equivalent of +/// +/// res\[0\] = a\[0\] - b\[1\] +/// res\[1\] = a\[1\] + b\[0\] +#[inline] +#[cfg_attr(test, assert_instr(qasx))] +pub unsafe fn qasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { + dsp_call!(arm_qasx, a, b) +} + +/// Returns the 16-bit signed saturated equivalent of +/// +/// res\[0\] = a\[0\] + b\[1\] +/// res\[1\] = a\[1\] - b\[0\] +#[inline] +#[cfg_attr(test, assert_instr(qsax))] +pub unsafe fn qsax(a: int16x2_t, b: int16x2_t) -> int16x2_t { + dsp_call!(arm_qsax, a, b) +} + +/// Returns the 16-bit signed saturated equivalent of +/// +/// res\[0\] = a\[0\] + b\[1\] +/// res\[1\] = a\[1\] + b\[0\] +/// +/// and the GE bits of the APSR are set. +#[inline] +#[cfg_attr(test, assert_instr(sadd16))] +pub unsafe fn sadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { + dsp_call!(arm_sadd16, a, b) +} + +/// Returns the 8-bit signed saturated equivalent of +/// +/// res\[0\] = a\[0\] + b\[1\] +/// res\[1\] = a\[1\] + b\[0\] +/// res\[2\] = a\[2\] + b\[2\] +/// res\[3\] = a\[3\] + b\[3\] +/// +/// and the GE bits of the APSR are set. +#[inline] +#[cfg_attr(test, assert_instr(sadd8))] +pub unsafe fn sadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { + dsp_call!(arm_sadd8, a, b) +} + +/// Dual 16-bit Signed Multiply with Addition of products +/// and 32-bit accumulation. +/// +/// Returns the 16-bit signed equivalent of +/// res = a\[0\] * b\[0\] + a\[1\] * b\[1\] + c +#[inline] +#[cfg_attr(test, assert_instr(smlad))] +pub unsafe fn smlad(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { + arm_smlad(::mem::transmute(a), ::mem::transmute(b), c) +} + +/// Dual 16-bit Signed Multiply with Subtraction of products +/// and 32-bit accumulation and overflow detection. +/// +/// Returns the 16-bit signed equivalent of +/// res = a\[0\] * b\[0\] - a\[1\] * b\[1\] + c +#[inline] +#[cfg_attr(test, assert_instr(smlsd))] +pub unsafe fn smlsd(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { + arm_smlsd(::mem::transmute(a), ::mem::transmute(b), c) +} + +/// Returns the 16-bit signed equivalent of +/// +/// res\[0\] = a\[0\] - b\[1\] +/// res\[1\] = a\[1\] + b\[0\] +/// +/// and the GE bits of the APSR are set. +#[inline] +#[cfg_attr(test, assert_instr(sasx))] +pub unsafe fn sasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { + dsp_call!(arm_sasx, a, b) +} + +/// Select bytes from each operand according to APSR GE flags +/// +/// Returns the equivalent of +/// +/// res\[0\] = GE\[0\] ? a\[0\] : b\[0\] +/// res\[1\] = GE\[1\] ? a\[1\] : b\[1\] +/// res\[2\] = GE\[2\] ? a\[2\] : b\[2\] +/// res\[3\] = GE\[3\] ? a\[3\] : b\[3\] +/// +/// where GE are bits of APSR +#[inline] +#[cfg_attr(test, assert_instr(sel))] +pub unsafe fn sel(a: int8x4_t, b: int8x4_t) -> int8x4_t { + dsp_call!(arm_sel, a, b) +} + +/// Signed halving parallel byte-wise addition. +/// +/// Returns the 8-bit signed equivalent of +/// +/// res\[0\] = (a\[0\] + b\[0\]) / 2 +/// res\[1\] = (a\[1\] + b\[1\]) / 2 +/// res\[2\] = (a\[2\] + b\[2\]) / 2 +/// res\[3\] = (a\[3\] + b\[3\]) / 2 +#[inline] +#[cfg_attr(test, assert_instr(shadd8))] +pub unsafe fn shadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { + dsp_call!(arm_shadd8, a, b) +} + +/// Signed halving parallel halfword-wise addition. +/// +/// Returns the 16-bit signed equivalent of +/// +/// res\[0\] = (a\[0\] + b\[0\]) / 2 +/// res\[1\] = (a\[1\] + b\[1\]) / 2 +#[inline] +#[cfg_attr(test, assert_instr(shadd16))] +pub unsafe fn shadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { + dsp_call!(arm_shadd16, a, b) +} + +/// Signed halving parallel byte-wise subtraction. +/// +/// Returns the 8-bit signed equivalent of +/// +/// res\[0\] = (a\[0\] - b\[0\]) / 2 +/// res\[1\] = (a\[1\] - b\[1\]) / 2 +/// res\[2\] = (a\[2\] - b\[2\]) / 2 +/// res\[3\] = (a\[3\] - b\[3\]) / 2 +#[inline] +#[cfg_attr(test, assert_instr(shsub8))] +pub unsafe fn shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { + dsp_call!(arm_shsub8, a, b) +} + +/// Signed halving parallel halfword-wise subtraction. +/// +/// Returns the 16-bit signed equivalent of +/// +/// res\[0\] = (a\[0\] - b\[0\]) / 2 +/// res\[1\] = (a\[1\] - b\[1\]) / 2 +#[inline] +#[cfg_attr(test, assert_instr(shsub16))] +pub unsafe fn shsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { + dsp_call!(arm_shsub16, a, b) +} + +/// Signed Dual Multiply Add. +/// +/// Returns the equivalent of +/// +/// res = a\[0\] * b\[0\] + a\[1\] * b\[1\] +/// +/// and sets the Q flag if overflow occurs on the addition. +#[inline] +#[cfg_attr(test, assert_instr(smuad))] +pub unsafe fn smuad(a: int16x2_t, b: int16x2_t) -> i32 { + arm_smuad(::mem::transmute(a), ::mem::transmute(b)) +} + +/// Signed Dual Multiply Add Reversed. +/// +/// Returns the equivalent of +/// +/// res = a\[0\] * b\[1\] + a\[1\] * b\[0\] +/// +/// and sets the Q flag if overflow occurs on the addition. +#[inline] +#[cfg_attr(test, assert_instr(smuadx))] +pub unsafe fn smuadx(a: int16x2_t, b: int16x2_t) -> i32 { + arm_smuadx(::mem::transmute(a), ::mem::transmute(b)) +} + +/// Signed Dual Multiply Subtract. +/// +/// Returns the equivalent of +/// +/// res = a\[0\] * b\[0\] - a\[1\] * b\[1\] +/// +/// and sets the Q flag if overflow occurs on the addition. +#[inline] +#[cfg_attr(test, assert_instr(smusd))] +pub unsafe fn smusd(a: int16x2_t, b: int16x2_t) -> i32 { + arm_smusd(::mem::transmute(a), ::mem::transmute(b)) +} + +/// Signed Dual Multiply Subtract Reversed. +/// +/// Returns the equivalent of +/// +/// res = a\[0\] * b\[1\] - a\[1\] * b\[0\] +/// +/// and sets the Q flag if overflow occurs on the addition. +#[inline] +#[cfg_attr(test, assert_instr(smusdx))] +pub unsafe fn smusdx(a: int16x2_t, b: int16x2_t) -> i32 { + arm_smusdx(::mem::transmute(a), ::mem::transmute(b)) +} + +/// Sum of 8-bit absolute differences. +/// +/// Returns the 8-bit unsigned equivalent of +/// +/// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\ +/// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\]) +#[inline] +#[cfg_attr(test, assert_instr(usad8))] +pub unsafe fn usad8(a: int8x4_t, b: int8x4_t) -> u32 { + arm_usad8(::mem::transmute(a), ::mem::transmute(b)) +} + +/// Sum of 8-bit absolute differences and constant. +/// +/// Returns the 8-bit unsigned equivalent of +/// +/// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\ +/// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\]) + c +#[inline] +#[cfg_attr(test, assert_instr(usad8))] +pub unsafe fn usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 { + usad8(a, b) + c +} + +#[cfg(test)] +mod tests { + use core_arch::arm::*; + use core_arch::simd::*; + use std::mem; + use stdsimd_test::simd_test; + + #[test] + fn qadd() { + unsafe { + assert_eq!(dsp::qadd(-10, 60), 50); + assert_eq!(dsp::qadd(::std::i32::MAX, 10), ::std::i32::MAX); + assert_eq!(dsp::qadd(::std::i32::MIN, -10), ::std::i32::MIN); + } + } + + #[test] + fn qsub() { + unsafe { + assert_eq!(dsp::qsub(10, 60), -50); + assert_eq!(dsp::qsub(::std::i32::MAX, -10), ::std::i32::MAX); + assert_eq!(dsp::qsub(::std::i32::MIN, 10), ::std::i32::MIN); + } + } + + #[test] + fn qadd8() { + unsafe { + let a = i8x4::new(1, 2, 3, ::std::i8::MAX); + let b = i8x4::new(2, -1, 0, 1); + let c = i8x4::new(3, 1, 3, ::std::i8::MAX); + let r: i8x4 = dsp_call!(dsp::qadd8, a, b); + assert_eq!(r, c); + } + } + + #[test] + fn qsub8() { + unsafe { + let a = i8x4::new(1, 2, 3, ::std::i8::MIN); + let b = i8x4::new(2, -1, 0, 1); + let c = i8x4::new(-1, 3, 3, ::std::i8::MIN); + let r: i8x4 = dsp_call!(dsp::qsub8, a, b); + assert_eq!(r, c); + } + } + + #[test] + fn qadd16() { + unsafe { + let a = i16x2::new(1, 2); + let b = i16x2::new(2, -1); + let c = i16x2::new(3, 1); + let r: i16x2 = dsp_call!(dsp::qadd16, a, b); + assert_eq!(r, c); + } + } + + #[test] + fn qsub16() { + unsafe { + let a = i16x2::new(10, 20); + let b = i16x2::new(20, -10); + let c = i16x2::new(-10, 30); + let r: i16x2 = dsp_call!(dsp::qsub16, a, b); + assert_eq!(r, c); + } + } + + #[test] + fn qasx() { + unsafe { + let a = i16x2::new(1, ::std::i16::MAX); + let b = i16x2::new(2, 2); + let c = i16x2::new(-1, ::std::i16::MAX); + let r: i16x2 = dsp_call!(dsp::qasx, a, b); + assert_eq!(r, c); + } + } + + #[test] + fn qsax() { + unsafe { + let a = i16x2::new(1, ::std::i16::MAX); + let b = i16x2::new(2, 2); + let c = i16x2::new(3, ::std::i16::MAX - 2); + let r: i16x2 = dsp_call!(dsp::qsax, a, b); + assert_eq!(r, c); + } + } + + #[test] + fn sadd16() { + unsafe { + let a = i16x2::new(1, ::std::i16::MAX); + let b = i16x2::new(2, 2); + let c = i16x2::new(3, -::std::i16::MAX); + let r: i16x2 = dsp_call!(dsp::sadd16, a, b); + assert_eq!(r, c); + } + } + + #[test] + fn sadd8() { + unsafe { + let a = i8x4::new(1, 2, 3, ::std::i8::MAX); + let b = i8x4::new(4, 3, 2, 2); + let c = i8x4::new(5, 5, 5, -::std::i8::MAX); + let r: i8x4 = dsp_call!(dsp::sadd8, a, b); + assert_eq!(r, c); + } + } + + #[test] + fn sasx() { + unsafe { + let a = i16x2::new(1, 2); + let b = i16x2::new(2, 1); + let c = i16x2::new(0, 4); + let r: i16x2 = dsp_call!(dsp::sasx, a, b); + assert_eq!(r, c); + } + } + + #[test] + fn smlad() { + unsafe { + let a = i16x2::new(1, 2); + let b = i16x2::new(3, 4); + let r = dsp::smlad(::mem::transmute(a), ::mem::transmute(b), 10); + assert_eq!(r, (1 * 3) + (2 * 4) + 10); + } + } + + #[test] + fn smlsd() { + unsafe { + let a = i16x2::new(1, 2); + let b = i16x2::new(3, 4); + let r = dsp::smlsd(::mem::transmute(a), ::mem::transmute(b), 10); + assert_eq!(r, ((1 * 3) - (2 * 4)) + 10); + } + } + + #[test] + fn sel() { + unsafe { + let a = i8x4::new(1, 2, 3, ::std::i8::MAX); + let b = i8x4::new(4, 3, 2, 2); + // call sadd8() to set GE bits + dsp::sadd8(::mem::transmute(a), ::mem::transmute(b)); + let c = i8x4::new(1, 2, 3, ::std::i8::MAX); + let r: i8x4 = dsp_call!(dsp::sel, a, b); + assert_eq!(r, c); + } + } + + #[test] + fn shadd8() { + unsafe { + let a = i8x4::new(1, 2, 3, 4); + let b = i8x4::new(5, 4, 3, 2); + let c = i8x4::new(3, 3, 3, 3); + let r: i8x4 = dsp_call!(dsp::shadd8, a, b); + assert_eq!(r, c); + } + } + + #[test] + fn shadd16() { + unsafe { + let a = i16x2::new(1, 2); + let b = i16x2::new(5, 4); + let c = i16x2::new(3, 3); + let r: i16x2 = dsp_call!(dsp::shadd16, a, b); + assert_eq!(r, c); + } + } + + #[test] + fn shsub8() { + unsafe { + let a = i8x4::new(1, 2, 3, 4); + let b = i8x4::new(5, 4, 3, 2); + let c = i8x4::new(-2, -1, 0, 1); + let r: i8x4 = dsp_call!(dsp::shsub8, a, b); + assert_eq!(r, c); + } + } + + #[test] + fn shsub16() { + unsafe { + let a = i16x2::new(1, 2); + let b = i16x2::new(5, 4); + let c = i16x2::new(-2, -1); + let r: i16x2 = dsp_call!(dsp::shsub16, a, b); + assert_eq!(r, c); + } + } + + #[test] + fn smuad() { + unsafe { + let a = i16x2::new(1, 2); + let b = i16x2::new(5, 4); + let r = dsp::smuad(::mem::transmute(a), ::mem::transmute(b)); + assert_eq!(r, 13); + } + } + + #[test] + fn smuadx() { + unsafe { + let a = i16x2::new(1, 2); + let b = i16x2::new(5, 4); + let r = dsp::smuadx(::mem::transmute(a), ::mem::transmute(b)); + assert_eq!(r, 14); + } + } + + #[test] + fn smusd() { + unsafe { + let a = i16x2::new(1, 2); + let b = i16x2::new(5, 4); + let r = dsp::smusd(::mem::transmute(a), ::mem::transmute(b)); + assert_eq!(r, -3); + } + } + + #[test] + fn smusdx() { + unsafe { + let a = i16x2::new(1, 2); + let b = i16x2::new(5, 4); + let r = dsp::smusdx(::mem::transmute(a), ::mem::transmute(b)); + assert_eq!(r, -6); + } + } + + #[test] + fn usad8() { + unsafe { + let a = i8x4::new(1, 2, 3, 4); + let b = i8x4::new(4, 3, 2, 1); + let r = dsp::usad8(::mem::transmute(a), ::mem::transmute(b)); + assert_eq!(r, 8); + } + } + + #[test] + fn usad8a() { + unsafe { + let a = i8x4::new(1, 2, 3, 4); + let b = i8x4::new(4, 3, 2, 1); + let c = 10; + let r = dsp::usad8a(::mem::transmute(a), ::mem::transmute(b), c); + assert_eq!(r, 8 + c); + } + } +} diff --git a/crates/core_arch/src/arm/dsp.rs b/crates/core_arch/src/arm/dsp.rs deleted file mode 100644 index 8385e7ed21..0000000000 --- a/crates/core_arch/src/arm/dsp.rs +++ /dev/null @@ -1,654 +0,0 @@ -//! ARM DSP Intrinsics. -//! -//! Based on "Arm C Language Extensions (ACLE) Version Q2 2018" -//! -//! https://developer.arm.com/products/software-development-tools/compilers/arm-compiler-5/docs/101028/0006 - -#[cfg(test)] -use stdsimd_test::assert_instr; - -types! { - /// ARM-specific 32-bit wide vector of four packed `i8`. - pub struct int8x4_t(i8, i8, i8, i8); - /// ARM-specific 32-bit wide vector of four packed `u8`. - pub struct uint8x4_t(u8, u8, u8, u8); - /// ARM-specific 32-bit wide vector of two packed `i16`. - pub struct int16x2_t(i16, i16); - /// ARM-specific 32-bit wide vector of two packed `u16`. - pub struct uint16x2_t(u16, u16); -} - -macro_rules! dsp_call { - ($name:expr, $a:expr, $b:expr) => { - ::mem::transmute($name(::mem::transmute($a), ::mem::transmute($b))) - }; -} - -extern "C" { - #[link_name = "llvm.arm.qadd"] - fn arm_qadd(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.qadd16"] - fn arm_qadd16(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.qadd8"] - fn arm_qadd8(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.qasx"] - fn arm_qasx(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.qsax"] - fn arm_qsax(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.qsub"] - fn arm_qsub(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.qsub8"] - fn arm_qsub8(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.qsub16"] - fn arm_qsub16(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.sadd16"] - fn arm_sadd16(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.sadd8"] - fn arm_sadd8(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.sasx"] - fn arm_sasx(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.smlad"] - fn arm_smlad(a: i32, b: i32, c: i32) -> i32; - - #[link_name = "llvm.arm.smlsd"] - fn arm_smlsd(a: i32, b: i32, c: i32) -> i32; - - #[link_name = "llvm.arm.sel"] - fn arm_sel(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.shadd8"] - fn arm_shadd8(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.shadd16"] - fn arm_shadd16(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.shsub8"] - fn arm_shsub8(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.shsub16"] - fn arm_shsub16(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.smuad"] - fn arm_smuad(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.smuadx"] - fn arm_smuadx(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.smusd"] - fn arm_smusd(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.smusdx"] - fn arm_smusdx(a: i32, b: i32) -> i32; - - #[link_name = "llvm.arm.usad8"] - fn arm_usad8(a: i32, b: i32) -> u32; -} - -/// Signed saturating addition -/// -/// Returns the 32-bit saturating signed equivalent of a + b. -#[inline] -#[cfg_attr(test, assert_instr(qadd))] -pub unsafe fn qadd(a: i32, b: i32) -> i32 { - arm_qadd(a, b) -} - -/// Signed saturating subtraction -/// -/// Returns the 32-bit saturating signed equivalent of a - b. -#[inline] -#[cfg_attr(test, assert_instr(qsub))] -pub unsafe fn qsub(a: i32, b: i32) -> i32 { - arm_qsub(a, b) -} - -/// Saturating four 8-bit integer additions -/// -/// Returns the 8-bit signed equivalent of -/// -/// res\[0\] = a\[0\] + b\[0\] -/// res\[1\] = a\[1\] + b\[1\] -/// res\[2\] = a\[2\] + b\[2\] -/// res\[3\] = a\[3\] + b\[3\] -#[inline] -#[cfg_attr(test, assert_instr(qadd8))] -pub unsafe fn qadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { - dsp_call!(arm_qadd8, a, b) -} - -/// Saturating two 8-bit integer subtraction -/// -/// Returns the 8-bit signed equivalent of -/// -/// res\[0\] = a\[0\] - b\[0\] -/// res\[1\] = a\[1\] - b\[1\] -/// res\[2\] = a\[2\] - b\[2\] -/// res\[3\] = a\[3\] - b\[3\] -#[inline] -#[cfg_attr(test, assert_instr(qsub8))] -pub unsafe fn qsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { - dsp_call!(arm_qsub8, a, b) -} - -/// Saturating two 16-bit integer subtraction -/// -/// Returns the 16-bit signed equivalent of -/// -/// res\[0\] = a\[0\] - b\[0\] -/// res\[1\] = a\[1\] - b\[1\] -#[inline] -#[cfg_attr(test, assert_instr(qsub16))] -pub unsafe fn qsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { - dsp_call!(arm_qsub16, a, b) -} - -/// Saturating two 16-bit integer additions -/// -/// Returns the 16-bit signed equivalent of -/// -/// res\[0\] = a\[0\] + b\[0\] -/// res\[1\] = a\[1\] + b\[1\] -#[inline] -#[cfg_attr(test, assert_instr(qadd16))] -pub unsafe fn qadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { - dsp_call!(arm_qadd16, a, b) -} - -/// Returns the 16-bit signed saturated equivalent of -/// -/// res\[0\] = a\[0\] - b\[1\] -/// res\[1\] = a\[1\] + b\[0\] -#[inline] -#[cfg_attr(test, assert_instr(qasx))] -pub unsafe fn qasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { - dsp_call!(arm_qasx, a, b) -} - -/// Returns the 16-bit signed saturated equivalent of -/// -/// res\[0\] = a\[0\] + b\[1\] -/// res\[1\] = a\[1\] - b\[0\] -#[inline] -#[cfg_attr(test, assert_instr(qsax))] -pub unsafe fn qsax(a: int16x2_t, b: int16x2_t) -> int16x2_t { - dsp_call!(arm_qsax, a, b) -} - -/// Returns the 16-bit signed saturated equivalent of -/// -/// res\[0\] = a\[0\] + b\[1\] -/// res\[1\] = a\[1\] + b\[0\] -/// -/// and the GE bits of the APSR are set. -#[inline] -#[cfg_attr(test, assert_instr(sadd16))] -pub unsafe fn sadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { - dsp_call!(arm_sadd16, a, b) -} - -/// Returns the 8-bit signed saturated equivalent of -/// -/// res\[0\] = a\[0\] + b\[1\] -/// res\[1\] = a\[1\] + b\[0\] -/// res\[2\] = a\[2\] + b\[2\] -/// res\[3\] = a\[3\] + b\[3\] -/// -/// and the GE bits of the APSR are set. -#[inline] -#[cfg_attr(test, assert_instr(sadd8))] -pub unsafe fn sadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { - dsp_call!(arm_sadd8, a, b) -} - -/// Dual 16-bit Signed Multiply with Addition of products -/// and 32-bit accumulation. -/// -/// Returns the 16-bit signed equivalent of -/// res = a\[0\] * b\[0\] + a\[1\] * b\[1\] + c -#[inline] -#[cfg_attr(test, assert_instr(smlad))] -pub unsafe fn smlad(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { - arm_smlad(::mem::transmute(a), ::mem::transmute(b), c) -} - -/// Dual 16-bit Signed Multiply with Subtraction of products -/// and 32-bit accumulation and overflow detection. -/// -/// Returns the 16-bit signed equivalent of -/// res = a\[0\] * b\[0\] - a\[1\] * b\[1\] + c -#[inline] -#[cfg_attr(test, assert_instr(smlsd))] -pub unsafe fn smlsd(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { - arm_smlsd(::mem::transmute(a), ::mem::transmute(b), c) -} - -/// Returns the 16-bit signed equivalent of -/// -/// res\[0\] = a\[0\] - b\[1\] -/// res\[1\] = a\[1\] + b\[0\] -/// -/// and the GE bits of the APSR are set. -#[inline] -#[cfg_attr(test, assert_instr(sasx))] -pub unsafe fn sasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { - dsp_call!(arm_sasx, a, b) -} - -/// Select bytes from each operand according to APSR GE flags -/// -/// Returns the equivalent of -/// -/// res\[0\] = GE\[0\] ? a\[0\] : b\[0\] -/// res\[1\] = GE\[1\] ? a\[1\] : b\[1\] -/// res\[2\] = GE\[2\] ? a\[2\] : b\[2\] -/// res\[3\] = GE\[3\] ? a\[3\] : b\[3\] -/// -/// where GE are bits of APSR -#[inline] -#[cfg_attr(test, assert_instr(sel))] -#[cfg(all(not(target_feature = "mclass")))] -pub unsafe fn sel(a: int8x4_t, b: int8x4_t) -> int8x4_t { - dsp_call!(arm_sel, a, b) -} - -/// Signed halving parallel byte-wise addition. -/// -/// Returns the 8-bit signed equivalent of -/// -/// res\[0\] = (a\[0\] + b\[0\]) / 2 -/// res\[1\] = (a\[1\] + b\[1\]) / 2 -/// res\[2\] = (a\[2\] + b\[2\]) / 2 -/// res\[3\] = (a\[3\] + b\[3\]) / 2 -#[inline] -#[cfg_attr(test, assert_instr(shadd8))] -pub unsafe fn shadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { - dsp_call!(arm_shadd8, a, b) -} - -/// Signed halving parallel halfword-wise addition. -/// -/// Returns the 16-bit signed equivalent of -/// -/// res\[0\] = (a\[0\] + b\[0\]) / 2 -/// res\[1\] = (a\[1\] + b\[1\]) / 2 -#[inline] -#[cfg_attr(test, assert_instr(shadd16))] -pub unsafe fn shadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { - dsp_call!(arm_shadd16, a, b) -} - -/// Signed halving parallel byte-wise subtraction. -/// -/// Returns the 8-bit signed equivalent of -/// -/// res\[0\] = (a\[0\] - b\[0\]) / 2 -/// res\[1\] = (a\[1\] - b\[1\]) / 2 -/// res\[2\] = (a\[2\] - b\[2\]) / 2 -/// res\[3\] = (a\[3\] - b\[3\]) / 2 -#[inline] -#[cfg_attr(test, assert_instr(shsub8))] -pub unsafe fn shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { - dsp_call!(arm_shsub8, a, b) -} - -/// Signed halving parallel halfword-wise subtraction. -/// -/// Returns the 16-bit signed equivalent of -/// -/// res\[0\] = (a\[0\] - b\[0\]) / 2 -/// res\[1\] = (a\[1\] - b\[1\]) / 2 -#[inline] -#[cfg_attr(test, assert_instr(shsub16))] -pub unsafe fn shsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { - dsp_call!(arm_shsub16, a, b) -} - -/// Signed Dual Multiply Add. -/// -/// Returns the equivalent of -/// -/// res = a\[0\] * b\[0\] + a\[1\] * b\[1\] -/// -/// and sets the Q flag if overflow occurs on the addition. -#[inline] -#[cfg_attr(test, assert_instr(smuad))] -pub unsafe fn smuad(a: int16x2_t, b: int16x2_t) -> i32 { - arm_smuad(::mem::transmute(a), ::mem::transmute(b)) -} - -/// Signed Dual Multiply Add Reversed. -/// -/// Returns the equivalent of -/// -/// res = a\[0\] * b\[1\] + a\[1\] * b\[0\] -/// -/// and sets the Q flag if overflow occurs on the addition. -#[inline] -#[cfg_attr(test, assert_instr(smuadx))] -pub unsafe fn smuadx(a: int16x2_t, b: int16x2_t) -> i32 { - arm_smuadx(::mem::transmute(a), ::mem::transmute(b)) -} - -/// Signed Dual Multiply Subtract. -/// -/// Returns the equivalent of -/// -/// res = a\[0\] * b\[0\] - a\[1\] * b\[1\] -/// -/// and sets the Q flag if overflow occurs on the addition. -#[inline] -#[cfg_attr(test, assert_instr(smusd))] -pub unsafe fn smusd(a: int16x2_t, b: int16x2_t) -> i32 { - arm_smusd(::mem::transmute(a), ::mem::transmute(b)) -} - -/// Signed Dual Multiply Subtract Reversed. -/// -/// Returns the equivalent of -/// -/// res = a\[0\] * b\[1\] - a\[1\] * b\[0\] -/// -/// and sets the Q flag if overflow occurs on the addition. -#[inline] -#[cfg_attr(test, assert_instr(smusdx))] -pub unsafe fn smusdx(a: int16x2_t, b: int16x2_t) -> i32 { - arm_smusdx(::mem::transmute(a), ::mem::transmute(b)) -} - -/// Sum of 8-bit absolute differences. -/// -/// Returns the 8-bit unsigned equivalent of -/// -/// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\ -/// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\]) -#[inline] -#[cfg_attr(test, assert_instr(usad8))] -pub unsafe fn usad8(a: int8x4_t, b: int8x4_t) -> u32 { - arm_usad8(::mem::transmute(a), ::mem::transmute(b)) -} - -/// Sum of 8-bit absolute differences and constant. -/// -/// Returns the 8-bit unsigned equivalent of -/// -/// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\ -/// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\]) + c -#[inline] -#[cfg_attr(test, assert_instr(usad8))] -pub unsafe fn usad8a(a: int8x4_t, b: int8x4_t, c: u32) -> u32 { - usad8(a, b) + c -} - -#[cfg(test)] -mod tests { - use core_arch::arm::*; - use core_arch::simd::*; - use std::mem; - use stdsimd_test::simd_test; - - #[test] - fn qadd() { - unsafe { - assert_eq!(dsp::qadd(-10, 60), 50); - assert_eq!(dsp::qadd(::std::i32::MAX, 10), ::std::i32::MAX); - assert_eq!(dsp::qadd(::std::i32::MIN, -10), ::std::i32::MIN); - } - } - - #[test] - fn qsub() { - unsafe { - assert_eq!(dsp::qsub(10, 60), -50); - assert_eq!(dsp::qsub(::std::i32::MAX, -10), ::std::i32::MAX); - assert_eq!(dsp::qsub(::std::i32::MIN, 10), ::std::i32::MIN); - } - } - - #[test] - fn qadd8() { - unsafe { - let a = i8x4::new(1, 2, 3, ::std::i8::MAX); - let b = i8x4::new(2, -1, 0, 1); - let c = i8x4::new(3, 1, 3, ::std::i8::MAX); - let r: i8x4 = dsp_call!(dsp::qadd8, a, b); - assert_eq!(r, c); - } - } - - #[test] - fn qsub8() { - unsafe { - let a = i8x4::new(1, 2, 3, ::std::i8::MIN); - let b = i8x4::new(2, -1, 0, 1); - let c = i8x4::new(-1, 3, 3, ::std::i8::MIN); - let r: i8x4 = dsp_call!(dsp::qsub8, a, b); - assert_eq!(r, c); - } - } - - #[test] - fn qadd16() { - unsafe { - let a = i16x2::new(1, 2); - let b = i16x2::new(2, -1); - let c = i16x2::new(3, 1); - let r: i16x2 = dsp_call!(dsp::qadd16, a, b); - assert_eq!(r, c); - } - } - - #[test] - fn qsub16() { - unsafe { - let a = i16x2::new(10, 20); - let b = i16x2::new(20, -10); - let c = i16x2::new(-10, 30); - let r: i16x2 = dsp_call!(dsp::qsub16, a, b); - assert_eq!(r, c); - } - } - - #[test] - fn qasx() { - unsafe { - let a = i16x2::new(1, ::std::i16::MAX); - let b = i16x2::new(2, 2); - let c = i16x2::new(-1, ::std::i16::MAX); - let r: i16x2 = dsp_call!(dsp::qasx, a, b); - assert_eq!(r, c); - } - } - - #[test] - fn qsax() { - unsafe { - let a = i16x2::new(1, ::std::i16::MAX); - let b = i16x2::new(2, 2); - let c = i16x2::new(3, ::std::i16::MAX - 2); - let r: i16x2 = dsp_call!(dsp::qsax, a, b); - assert_eq!(r, c); - } - } - - #[test] - fn sadd16() { - unsafe { - let a = i16x2::new(1, ::std::i16::MAX); - let b = i16x2::new(2, 2); - let c = i16x2::new(3, -::std::i16::MAX); - let r: i16x2 = dsp_call!(dsp::sadd16, a, b); - assert_eq!(r, c); - } - } - - #[test] - fn sadd8() { - unsafe { - let a = i8x4::new(1, 2, 3, ::std::i8::MAX); - let b = i8x4::new(4, 3, 2, 2); - let c = i8x4::new(5, 5, 5, -::std::i8::MAX); - let r: i8x4 = dsp_call!(dsp::sadd8, a, b); - assert_eq!(r, c); - } - } - - #[test] - fn sasx() { - unsafe { - let a = i16x2::new(1, 2); - let b = i16x2::new(2, 1); - let c = i16x2::new(0, 4); - let r: i16x2 = dsp_call!(dsp::sasx, a, b); - assert_eq!(r, c); - } - } - - #[test] - fn smlad() { - unsafe { - let a = i16x2::new(1, 2); - let b = i16x2::new(3, 4); - let r = dsp::smlad(::mem::transmute(a), ::mem::transmute(b), 10); - assert_eq!(r, (1 * 3) + (2 * 4) + 10); - } - } - - #[test] - fn smlsd() { - unsafe { - let a = i16x2::new(1, 2); - let b = i16x2::new(3, 4); - let r = dsp::smlsd(::mem::transmute(a), ::mem::transmute(b), 10); - assert_eq!(r, ((1 * 3) - (2 * 4)) + 10); - } - } - - #[test] - fn sel() { - unsafe { - let a = i8x4::new(1, 2, 3, ::std::i8::MAX); - let b = i8x4::new(4, 3, 2, 2); - // call sadd8() to set GE bits - dsp::sadd8(::mem::transmute(a), ::mem::transmute(b)); - let c = i8x4::new(1, 2, 3, ::std::i8::MAX); - let r: i8x4 = dsp_call!(dsp::sel, a, b); - assert_eq!(r, c); - } - } - - #[test] - fn shadd8() { - unsafe { - let a = i8x4::new(1, 2, 3, 4); - let b = i8x4::new(5, 4, 3, 2); - let c = i8x4::new(3, 3, 3, 3); - let r: i8x4 = dsp_call!(dsp::shadd8, a, b); - assert_eq!(r, c); - } - } - - #[test] - fn shadd16() { - unsafe { - let a = i16x2::new(1, 2); - let b = i16x2::new(5, 4); - let c = i16x2::new(3, 3); - let r: i16x2 = dsp_call!(dsp::shadd16, a, b); - assert_eq!(r, c); - } - } - - #[test] - fn shsub8() { - unsafe { - let a = i8x4::new(1, 2, 3, 4); - let b = i8x4::new(5, 4, 3, 2); - let c = i8x4::new(-2, -1, 0, 1); - let r: i8x4 = dsp_call!(dsp::shsub8, a, b); - assert_eq!(r, c); - } - } - - #[test] - fn shsub16() { - unsafe { - let a = i16x2::new(1, 2); - let b = i16x2::new(5, 4); - let c = i16x2::new(-2, -1); - let r: i16x2 = dsp_call!(dsp::shsub16, a, b); - assert_eq!(r, c); - } - } - - #[test] - fn smuad() { - unsafe { - let a = i16x2::new(1, 2); - let b = i16x2::new(5, 4); - let r = dsp::smuad(::mem::transmute(a), ::mem::transmute(b)); - assert_eq!(r, 13); - } - } - - #[test] - fn smuadx() { - unsafe { - let a = i16x2::new(1, 2); - let b = i16x2::new(5, 4); - let r = dsp::smuadx(::mem::transmute(a), ::mem::transmute(b)); - assert_eq!(r, 14); - } - } - - #[test] - fn smusd() { - unsafe { - let a = i16x2::new(1, 2); - let b = i16x2::new(5, 4); - let r = dsp::smusd(::mem::transmute(a), ::mem::transmute(b)); - assert_eq!(r, -3); - } - } - - #[test] - fn smusdx() { - unsafe { - let a = i16x2::new(1, 2); - let b = i16x2::new(5, 4); - let r = dsp::smusdx(::mem::transmute(a), ::mem::transmute(b)); - assert_eq!(r, -6); - } - } - - #[test] - fn usad8() { - unsafe { - let a = i8x4::new(1, 2, 3, 4); - let b = i8x4::new(4, 3, 2, 1); - let r = dsp::usad8(::mem::transmute(a), ::mem::transmute(b)); - assert_eq!(r, 8); - } - } - - #[test] - fn usad8a() { - unsafe { - let a = i8x4::new(1, 2, 3, 4); - let b = i8x4::new(4, 3, 2, 1); - let c = 10; - let r = dsp::usad8a(::mem::transmute(a), ::mem::transmute(b), c); - assert_eq!(r, 8 + c); - } - } -} diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index dd69b11457..e5b40c9bc7 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -19,12 +19,6 @@ mod v7; #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] pub use self::v7::*; -// TODO move into the `acle::{dsp,simd32}` modules -#[cfg(any(all(target_feature = "v7", not(target_feature = "mclass")), dox))] -mod dsp; -#[cfg(any(all(target_feature = "v7", not(target_feature = "mclass")), dox))] -pub use self::dsp::*; - // NEON is supported on AArch64, and on ARM when built with the v7 and neon // features. Building ARM without neon produces incorrect codegen. #[cfg(any( From 4cb6d8c4cbb35eb64b6b75ba1041a6b2703b7de4 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 15 Feb 2019 14:47:49 +0100 Subject: [PATCH 21/31] acle/ex: fix raw pointer mutability --- crates/core_arch/src/acle/ex.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/core_arch/src/acle/ex.rs b/crates/core_arch/src/acle/ex.rs index c25e0dc37a..c3ebe0bfad 100644 --- a/crates/core_arch/src/acle/ex.rs +++ b/crates/core_arch/src/acle/ex.rs @@ -70,10 +70,10 @@ pub unsafe fn __ldrex(p: *const u32) -> u32 { #[cfg( target_feature = "v6k", // includes v7-M but excludes v6-M )] -pub unsafe fn __strexb(value: u32, addr: *const u8) -> u32 { +pub unsafe fn __strexb(value: u32, addr: *mut u8) -> u32 { extern "C" { #[link_name = "llvm.arm.strex.p0i8"] - fn strex8(value: u32, addr: *const u8) -> u32; + fn strex8(value: u32, addr: *mut u8) -> u32; } strex8(value, addr) @@ -87,10 +87,10 @@ pub unsafe fn __strexb(value: u32, addr: *const u8) -> u32 { #[cfg( target_feature = "v6k", // includes v7-M but excludes v6-M )] -pub unsafe fn __strexh(value: u16, addr: *const u16) -> u32 { +pub unsafe fn __strexh(value: u16, addr: *mut u16) -> u32 { extern "C" { #[link_name = "llvm.arm.strex.p0i16"] - fn strex16(value: u32, addr: *const u16) -> u32; + fn strex16(value: u32, addr: *mut u16) -> u32; } strex16(value as u32, addr) @@ -105,10 +105,10 @@ pub unsafe fn __strexh(value: u16, addr: *const u16) -> u32 { all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M all(target_feature = "v7", target_feature = "mclass"), // v7-M ))] -pub unsafe fn __strex(value: u32, addr: *const u32) -> u32 { +pub unsafe fn __strex(value: u32, addr: *mut u32) -> u32 { extern "C" { #[link_name = "llvm.arm.strex.p0i32"] - fn strex32(value: u32, addr: *const u32) -> u32; + fn strex32(value: u32, addr: *mut u32) -> u32; } strex32(value, addr) From f73e69fd4709fb8f57790e4e0f06e4b0fc410afd Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 15 Feb 2019 14:50:09 +0100 Subject: [PATCH 22/31] cargo fmt --- crates/core_arch/src/acle/barrier/mod.rs | 10 ++-------- crates/core_arch/src/acle/mod.rs | 10 ++-------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/crates/core_arch/src/acle/barrier/mod.rs b/crates/core_arch/src/acle/barrier/mod.rs index 61686895f0..47ca55cc15 100644 --- a/crates/core_arch/src/acle/barrier/mod.rs +++ b/crates/core_arch/src/acle/barrier/mod.rs @@ -51,16 +51,10 @@ mod common; ))] pub use self::common::*; -#[cfg(any( - target_arch = "aarch64", - target_feature = "v7", -))] +#[cfg(any(target_arch = "aarch64", target_feature = "v7",))] mod not_mclass; -#[cfg(any( - target_arch = "aarch64", - target_feature = "v7", -))] +#[cfg(any(target_arch = "aarch64", target_feature = "v7",))] pub use self::not_mclass::*; #[cfg(target_arch = "aarch64")] diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 1354fe9fae..5f29decf5a 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -91,16 +91,10 @@ mod dsp; pub use self::dsp::*; // Supported arches: 6, 7-M. See Section 10.1 of ACLE (e.g. SSAT) -#[cfg(all( - not(target_arch = "aarch64"), - target_feature = "v6", -))] +#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))] mod sat; -#[cfg(all( - not(target_arch = "aarch64"), - target_feature = "v6", -))] +#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))] pub use self::sat::*; // Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says From c2e2edd4c4345459dfdfeda3c55707a8688c950e Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 15 Feb 2019 14:53:37 +0100 Subject: [PATCH 23/31] add missing imports --- crates/core_arch/src/acle/dsp.rs | 3 +++ crates/core_arch/src/acle/simd32.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/crates/core_arch/src/acle/dsp.rs b/crates/core_arch/src/acle/dsp.rs index 3a71f2c469..b021a68042 100644 --- a/crates/core_arch/src/acle/dsp.rs +++ b/crates/core_arch/src/acle/dsp.rs @@ -20,6 +20,9 @@ //! - [ ] __smlawb //! - [ ] __smlawt +#[cfg(test)] +use stdsimd_test::assert_instr; + extern "C" { #[link_name = "llvm.arm.qadd"] fn arm_qadd(a: i32, b: i32) -> i32; diff --git a/crates/core_arch/src/acle/simd32.rs b/crates/core_arch/src/acle/simd32.rs index 420ce2b2d0..13eda17697 100644 --- a/crates/core_arch/src/acle/simd32.rs +++ b/crates/core_arch/src/acle/simd32.rs @@ -62,6 +62,9 @@ //! - [x] __smusd //! - [x] __smusdx +#[cfg(test)] +use stdsimd_test::assert_instr; + types! { /// ARM-specific 32-bit wide vector of four packed `i8`. pub struct int8x4_t(i8, i8, i8, i8); From f8ecff97b3eb9deefc4ad28266c0157962e0c085 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Fri, 15 Feb 2019 14:55:35 +0100 Subject: [PATCH 24/31] conditionally declare the dmb_dsb macro --- crates/core_arch/src/acle/barrier/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crates/core_arch/src/acle/barrier/mod.rs b/crates/core_arch/src/acle/barrier/mod.rs index 47ca55cc15..b3cbf44d27 100644 --- a/crates/core_arch/src/acle/barrier/mod.rs +++ b/crates/core_arch/src/acle/barrier/mod.rs @@ -19,6 +19,11 @@ mod cp15; pub use self::cp15::*; // Dedicated instructions +#[cfg(any( + target_arch = "aarch64", + target_feature = "v7", + target_feature = "mclass" +))] macro_rules! dmb_dsb { ($A:ident) => { impl super::super::sealed::Dmb for $A { From b4836bf5df6e0c2b62c13800b7a0e2e27e3cb817 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Mon, 18 Feb 2019 13:14:14 +0100 Subject: [PATCH 25/31] acle/{dsp,simd32}: add leading underscores to match ACLE spec --- crates/core_arch/src/acle/dsp.rs | 4 +-- crates/core_arch/src/acle/simd32.rs | 44 ++++++++++++++--------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/crates/core_arch/src/acle/dsp.rs b/crates/core_arch/src/acle/dsp.rs index b021a68042..f3fe4c437c 100644 --- a/crates/core_arch/src/acle/dsp.rs +++ b/crates/core_arch/src/acle/dsp.rs @@ -37,7 +37,7 @@ extern "C" { /// Returns the 32-bit saturating signed equivalent of a + b. #[inline] #[cfg_attr(test, assert_instr(qadd))] -pub unsafe fn qadd(a: i32, b: i32) -> i32 { +pub unsafe fn __qadd(a: i32, b: i32) -> i32 { arm_qadd(a, b) } @@ -46,6 +46,6 @@ pub unsafe fn qadd(a: i32, b: i32) -> i32 { /// Returns the 32-bit saturating signed equivalent of a - b. #[inline] #[cfg_attr(test, assert_instr(qsub))] -pub unsafe fn qsub(a: i32, b: i32) -> i32 { +pub unsafe fn __qsub(a: i32, b: i32) -> i32 { arm_qsub(a, b) } diff --git a/crates/core_arch/src/acle/simd32.rs b/crates/core_arch/src/acle/simd32.rs index 13eda17697..76dce39f57 100644 --- a/crates/core_arch/src/acle/simd32.rs +++ b/crates/core_arch/src/acle/simd32.rs @@ -157,7 +157,7 @@ extern "C" { /// res\[3\] = a\[3\] + b\[3\] #[inline] #[cfg_attr(test, assert_instr(qadd8))] -pub unsafe fn qadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { +pub unsafe fn __qadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_qadd8, a, b) } @@ -171,7 +171,7 @@ pub unsafe fn qadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { /// res\[3\] = a\[3\] - b\[3\] #[inline] #[cfg_attr(test, assert_instr(qsub8))] -pub unsafe fn qsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { +pub unsafe fn __qsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_qsub8, a, b) } @@ -183,7 +183,7 @@ pub unsafe fn qsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { /// res\[1\] = a\[1\] - b\[1\] #[inline] #[cfg_attr(test, assert_instr(qsub16))] -pub unsafe fn qsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __qsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_qsub16, a, b) } @@ -195,7 +195,7 @@ pub unsafe fn qsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// res\[1\] = a\[1\] + b\[1\] #[inline] #[cfg_attr(test, assert_instr(qadd16))] -pub unsafe fn qadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __qadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_qadd16, a, b) } @@ -205,7 +205,7 @@ pub unsafe fn qadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// res\[1\] = a\[1\] + b\[0\] #[inline] #[cfg_attr(test, assert_instr(qasx))] -pub unsafe fn qasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __qasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_qasx, a, b) } @@ -215,7 +215,7 @@ pub unsafe fn qasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// res\[1\] = a\[1\] - b\[0\] #[inline] #[cfg_attr(test, assert_instr(qsax))] -pub unsafe fn qsax(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __qsax(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_qsax, a, b) } @@ -227,7 +227,7 @@ pub unsafe fn qsax(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// and the GE bits of the APSR are set. #[inline] #[cfg_attr(test, assert_instr(sadd16))] -pub unsafe fn sadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __sadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_sadd16, a, b) } @@ -241,7 +241,7 @@ pub unsafe fn sadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// and the GE bits of the APSR are set. #[inline] #[cfg_attr(test, assert_instr(sadd8))] -pub unsafe fn sadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { +pub unsafe fn __sadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_sadd8, a, b) } @@ -252,7 +252,7 @@ pub unsafe fn sadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { /// res = a\[0\] * b\[0\] + a\[1\] * b\[1\] + c #[inline] #[cfg_attr(test, assert_instr(smlad))] -pub unsafe fn smlad(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { +pub unsafe fn __smlad(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { arm_smlad(::mem::transmute(a), ::mem::transmute(b), c) } @@ -263,7 +263,7 @@ pub unsafe fn smlad(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { /// res = a\[0\] * b\[0\] - a\[1\] * b\[1\] + c #[inline] #[cfg_attr(test, assert_instr(smlsd))] -pub unsafe fn smlsd(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { +pub unsafe fn __smlsd(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { arm_smlsd(::mem::transmute(a), ::mem::transmute(b), c) } @@ -275,7 +275,7 @@ pub unsafe fn smlsd(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { /// and the GE bits of the APSR are set. #[inline] #[cfg_attr(test, assert_instr(sasx))] -pub unsafe fn sasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __sasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_sasx, a, b) } @@ -291,7 +291,7 @@ pub unsafe fn sasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// where GE are bits of APSR #[inline] #[cfg_attr(test, assert_instr(sel))] -pub unsafe fn sel(a: int8x4_t, b: int8x4_t) -> int8x4_t { +pub unsafe fn __sel(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_sel, a, b) } @@ -305,7 +305,7 @@ pub unsafe fn sel(a: int8x4_t, b: int8x4_t) -> int8x4_t { /// res\[3\] = (a\[3\] + b\[3\]) / 2 #[inline] #[cfg_attr(test, assert_instr(shadd8))] -pub unsafe fn shadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { +pub unsafe fn __shadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_shadd8, a, b) } @@ -317,7 +317,7 @@ pub unsafe fn shadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { /// res\[1\] = (a\[1\] + b\[1\]) / 2 #[inline] #[cfg_attr(test, assert_instr(shadd16))] -pub unsafe fn shadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __shadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_shadd16, a, b) } @@ -331,7 +331,7 @@ pub unsafe fn shadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// res\[3\] = (a\[3\] - b\[3\]) / 2 #[inline] #[cfg_attr(test, assert_instr(shsub8))] -pub unsafe fn shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { +pub unsafe fn __shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_shsub8, a, b) } @@ -343,7 +343,7 @@ pub unsafe fn shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { /// res\[1\] = (a\[1\] - b\[1\]) / 2 #[inline] #[cfg_attr(test, assert_instr(shsub16))] -pub unsafe fn shsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { +pub unsafe fn __shsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_shsub16, a, b) } @@ -356,7 +356,7 @@ pub unsafe fn shsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { /// and sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smuad))] -pub unsafe fn smuad(a: int16x2_t, b: int16x2_t) -> i32 { +pub unsafe fn __smuad(a: int16x2_t, b: int16x2_t) -> i32 { arm_smuad(::mem::transmute(a), ::mem::transmute(b)) } @@ -369,7 +369,7 @@ pub unsafe fn smuad(a: int16x2_t, b: int16x2_t) -> i32 { /// and sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smuadx))] -pub unsafe fn smuadx(a: int16x2_t, b: int16x2_t) -> i32 { +pub unsafe fn __smuadx(a: int16x2_t, b: int16x2_t) -> i32 { arm_smuadx(::mem::transmute(a), ::mem::transmute(b)) } @@ -382,7 +382,7 @@ pub unsafe fn smuadx(a: int16x2_t, b: int16x2_t) -> i32 { /// and sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smusd))] -pub unsafe fn smusd(a: int16x2_t, b: int16x2_t) -> i32 { +pub unsafe fn __smusd(a: int16x2_t, b: int16x2_t) -> i32 { arm_smusd(::mem::transmute(a), ::mem::transmute(b)) } @@ -395,7 +395,7 @@ pub unsafe fn smusd(a: int16x2_t, b: int16x2_t) -> i32 { /// and sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smusdx))] -pub unsafe fn smusdx(a: int16x2_t, b: int16x2_t) -> i32 { +pub unsafe fn __smusdx(a: int16x2_t, b: int16x2_t) -> i32 { arm_smusdx(::mem::transmute(a), ::mem::transmute(b)) } @@ -407,7 +407,7 @@ pub unsafe fn smusdx(a: int16x2_t, b: int16x2_t) -> i32 { /// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\]) #[inline] #[cfg_attr(test, assert_instr(usad8))] -pub unsafe fn usad8(a: int8x4_t, b: int8x4_t) -> u32 { +pub unsafe fn __usad8(a: int8x4_t, b: int8x4_t) -> u32 { arm_usad8(::mem::transmute(a), ::mem::transmute(b)) } @@ -419,7 +419,7 @@ pub unsafe fn usad8(a: int8x4_t, b: int8x4_t) -> u32 { /// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\]) + c #[inline] #[cfg_attr(test, assert_instr(usad8))] -pub unsafe fn usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 { +pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 { usad8(a, b) + c } From 660ead1a2b260c64cd22a6771df0f9bc9823baf4 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Mon, 18 Feb 2019 15:03:13 +0100 Subject: [PATCH 26/31] fix CI --- crates/core_arch/src/acle/simd32.rs | 58 ++++++++++++++--------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/crates/core_arch/src/acle/simd32.rs b/crates/core_arch/src/acle/simd32.rs index 76dce39f57..58b47304c4 100644 --- a/crates/core_arch/src/acle/simd32.rs +++ b/crates/core_arch/src/acle/simd32.rs @@ -433,18 +433,18 @@ mod tests { #[test] fn qadd() { unsafe { - assert_eq!(dsp::qadd(-10, 60), 50); - assert_eq!(dsp::qadd(::std::i32::MAX, 10), ::std::i32::MAX); - assert_eq!(dsp::qadd(::std::i32::MIN, -10), ::std::i32::MIN); + assert_eq!(dsp::__qadd(-10, 60), 50); + assert_eq!(dsp::__qadd(::std::i32::MAX, 10), ::std::i32::MAX); + assert_eq!(dsp::__qadd(::std::i32::MIN, -10), ::std::i32::MIN); } } #[test] fn qsub() { unsafe { - assert_eq!(dsp::qsub(10, 60), -50); - assert_eq!(dsp::qsub(::std::i32::MAX, -10), ::std::i32::MAX); - assert_eq!(dsp::qsub(::std::i32::MIN, 10), ::std::i32::MIN); + assert_eq!(dsp::__qsub(10, 60), -50); + assert_eq!(dsp::__qsub(::std::i32::MAX, -10), ::std::i32::MAX); + assert_eq!(dsp::__qsub(::std::i32::MIN, 10), ::std::i32::MIN); } } @@ -454,7 +454,7 @@ mod tests { let a = i8x4::new(1, 2, 3, ::std::i8::MAX); let b = i8x4::new(2, -1, 0, 1); let c = i8x4::new(3, 1, 3, ::std::i8::MAX); - let r: i8x4 = dsp_call!(dsp::qadd8, a, b); + let r: i8x4 = dsp_call!(dsp::__qadd8, a, b); assert_eq!(r, c); } } @@ -465,7 +465,7 @@ mod tests { let a = i8x4::new(1, 2, 3, ::std::i8::MIN); let b = i8x4::new(2, -1, 0, 1); let c = i8x4::new(-1, 3, 3, ::std::i8::MIN); - let r: i8x4 = dsp_call!(dsp::qsub8, a, b); + let r: i8x4 = dsp_call!(dsp::__qsub8, a, b); assert_eq!(r, c); } } @@ -476,7 +476,7 @@ mod tests { let a = i16x2::new(1, 2); let b = i16x2::new(2, -1); let c = i16x2::new(3, 1); - let r: i16x2 = dsp_call!(dsp::qadd16, a, b); + let r: i16x2 = dsp_call!(dsp::__qadd16, a, b); assert_eq!(r, c); } } @@ -487,7 +487,7 @@ mod tests { let a = i16x2::new(10, 20); let b = i16x2::new(20, -10); let c = i16x2::new(-10, 30); - let r: i16x2 = dsp_call!(dsp::qsub16, a, b); + let r: i16x2 = dsp_call!(dsp::__qsub16, a, b); assert_eq!(r, c); } } @@ -498,7 +498,7 @@ mod tests { let a = i16x2::new(1, ::std::i16::MAX); let b = i16x2::new(2, 2); let c = i16x2::new(-1, ::std::i16::MAX); - let r: i16x2 = dsp_call!(dsp::qasx, a, b); + let r: i16x2 = dsp_call!(dsp::__qasx, a, b); assert_eq!(r, c); } } @@ -509,7 +509,7 @@ mod tests { let a = i16x2::new(1, ::std::i16::MAX); let b = i16x2::new(2, 2); let c = i16x2::new(3, ::std::i16::MAX - 2); - let r: i16x2 = dsp_call!(dsp::qsax, a, b); + let r: i16x2 = dsp_call!(dsp::__qsax, a, b); assert_eq!(r, c); } } @@ -520,7 +520,7 @@ mod tests { let a = i16x2::new(1, ::std::i16::MAX); let b = i16x2::new(2, 2); let c = i16x2::new(3, -::std::i16::MAX); - let r: i16x2 = dsp_call!(dsp::sadd16, a, b); + let r: i16x2 = dsp_call!(dsp::__sadd16, a, b); assert_eq!(r, c); } } @@ -531,7 +531,7 @@ mod tests { let a = i8x4::new(1, 2, 3, ::std::i8::MAX); let b = i8x4::new(4, 3, 2, 2); let c = i8x4::new(5, 5, 5, -::std::i8::MAX); - let r: i8x4 = dsp_call!(dsp::sadd8, a, b); + let r: i8x4 = dsp_call!(dsp::__sadd8, a, b); assert_eq!(r, c); } } @@ -542,7 +542,7 @@ mod tests { let a = i16x2::new(1, 2); let b = i16x2::new(2, 1); let c = i16x2::new(0, 4); - let r: i16x2 = dsp_call!(dsp::sasx, a, b); + let r: i16x2 = dsp_call!(dsp::__sasx, a, b); assert_eq!(r, c); } } @@ -552,7 +552,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(3, 4); - let r = dsp::smlad(::mem::transmute(a), ::mem::transmute(b), 10); + let r = dsp::__smlad(::mem::transmute(a), ::mem::transmute(b), 10); assert_eq!(r, (1 * 3) + (2 * 4) + 10); } } @@ -562,7 +562,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(3, 4); - let r = dsp::smlsd(::mem::transmute(a), ::mem::transmute(b), 10); + let r = dsp::__smlsd(::mem::transmute(a), ::mem::transmute(b), 10); assert_eq!(r, ((1 * 3) - (2 * 4)) + 10); } } @@ -573,9 +573,9 @@ mod tests { let a = i8x4::new(1, 2, 3, ::std::i8::MAX); let b = i8x4::new(4, 3, 2, 2); // call sadd8() to set GE bits - dsp::sadd8(::mem::transmute(a), ::mem::transmute(b)); + dsp::__sadd8(::mem::transmute(a), ::mem::transmute(b)); let c = i8x4::new(1, 2, 3, ::std::i8::MAX); - let r: i8x4 = dsp_call!(dsp::sel, a, b); + let r: i8x4 = dsp_call!(dsp::__sel, a, b); assert_eq!(r, c); } } @@ -586,7 +586,7 @@ mod tests { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(5, 4, 3, 2); let c = i8x4::new(3, 3, 3, 3); - let r: i8x4 = dsp_call!(dsp::shadd8, a, b); + let r: i8x4 = dsp_call!(dsp::__shadd8, a, b); assert_eq!(r, c); } } @@ -597,7 +597,7 @@ mod tests { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); let c = i16x2::new(3, 3); - let r: i16x2 = dsp_call!(dsp::shadd16, a, b); + let r: i16x2 = dsp_call!(dsp::__shadd16, a, b); assert_eq!(r, c); } } @@ -608,7 +608,7 @@ mod tests { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(5, 4, 3, 2); let c = i8x4::new(-2, -1, 0, 1); - let r: i8x4 = dsp_call!(dsp::shsub8, a, b); + let r: i8x4 = dsp_call!(dsp::__shsub8, a, b); assert_eq!(r, c); } } @@ -619,7 +619,7 @@ mod tests { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); let c = i16x2::new(-2, -1); - let r: i16x2 = dsp_call!(dsp::shsub16, a, b); + let r: i16x2 = dsp_call!(dsp::__shsub16, a, b); assert_eq!(r, c); } } @@ -629,7 +629,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); - let r = dsp::smuad(::mem::transmute(a), ::mem::transmute(b)); + let r = dsp::__smuad(::mem::transmute(a), ::mem::transmute(b)); assert_eq!(r, 13); } } @@ -639,7 +639,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); - let r = dsp::smuadx(::mem::transmute(a), ::mem::transmute(b)); + let r = dsp::__smuadx(::mem::transmute(a), ::mem::transmute(b)); assert_eq!(r, 14); } } @@ -649,7 +649,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); - let r = dsp::smusd(::mem::transmute(a), ::mem::transmute(b)); + let r = dsp::__smusd(::mem::transmute(a), ::mem::transmute(b)); assert_eq!(r, -3); } } @@ -659,7 +659,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); - let r = dsp::smusdx(::mem::transmute(a), ::mem::transmute(b)); + let r = dsp::__smusdx(::mem::transmute(a), ::mem::transmute(b)); assert_eq!(r, -6); } } @@ -669,7 +669,7 @@ mod tests { unsafe { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(4, 3, 2, 1); - let r = dsp::usad8(::mem::transmute(a), ::mem::transmute(b)); + let r = dsp::__usad8(::mem::transmute(a), ::mem::transmute(b)); assert_eq!(r, 8); } } @@ -680,7 +680,7 @@ mod tests { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(4, 3, 2, 1); let c = 10; - let r = dsp::usad8a(::mem::transmute(a), ::mem::transmute(b), c); + let r = dsp::__usad8a(::mem::transmute(a), ::mem::transmute(b), c); assert_eq!(r, 8 + c); } } From 608fdd73cf1aeec1173d6a6378e86a44b47ce1a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Miku=C5=82a?= Date: Mon, 18 Feb 2019 16:45:02 +0100 Subject: [PATCH 27/31] Update crates/core_arch/src/acle/simd32.rs Co-Authored-By: japaric --- crates/core_arch/src/acle/simd32.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_arch/src/acle/simd32.rs b/crates/core_arch/src/acle/simd32.rs index 58b47304c4..ae704ef175 100644 --- a/crates/core_arch/src/acle/simd32.rs +++ b/crates/core_arch/src/acle/simd32.rs @@ -420,7 +420,7 @@ pub unsafe fn __usad8(a: int8x4_t, b: int8x4_t) -> u32 { #[inline] #[cfg_attr(test, assert_instr(usad8))] pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 { - usad8(a, b) + c + __usad8(a, b) + c } #[cfg(test)] From 3a30a06636ed086b545a5d3468f99a185dbbb38c Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Mon, 18 Feb 2019 17:39:29 +0100 Subject: [PATCH 28/31] acle/ex: CLREX requires v6k --- crates/core_arch/src/acle/ex.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/core_arch/src/acle/ex.rs b/crates/core_arch/src/acle/ex.rs index c3ebe0bfad..0426c65186 100644 --- a/crates/core_arch/src/acle/ex.rs +++ b/crates/core_arch/src/acle/ex.rs @@ -3,8 +3,10 @@ /// Removes the exclusive lock created by LDREX // Supported: v6, v6K, v7-M, v7-A, v7-R // Not supported: v5, v6-M +// NOTE: there's no dedicated CLREX instruction in v6 ( Date: Mon, 18 Feb 2019 18:01:37 +0100 Subject: [PATCH 29/31] acle/{dsp,simd32}: fix unit tests --- crates/core_arch/src/acle/dsp.rs | 25 +++++++++++ crates/core_arch/src/acle/simd32.rs | 67 +++++++++++------------------ 2 files changed, 49 insertions(+), 43 deletions(-) diff --git a/crates/core_arch/src/acle/dsp.rs b/crates/core_arch/src/acle/dsp.rs index f3fe4c437c..e929e98e40 100644 --- a/crates/core_arch/src/acle/dsp.rs +++ b/crates/core_arch/src/acle/dsp.rs @@ -49,3 +49,28 @@ pub unsafe fn __qadd(a: i32, b: i32) -> i32 { pub unsafe fn __qsub(a: i32, b: i32) -> i32 { arm_qsub(a, b) } + +#[cfg(test)] +mod tests { + use core_arch::arm::*; + use std::mem; + use stdsimd_test::simd_test; + + #[test] + fn qadd() { + unsafe { + assert_eq!(super::__qadd(-10, 60), 50); + assert_eq!(super::__qadd(::std::i32::MAX, 10), ::std::i32::MAX); + assert_eq!(super::__qadd(::std::i32::MIN, -10), ::std::i32::MIN); + } + } + + #[test] + fn qsub() { + unsafe { + assert_eq!(super::__qsub(10, 60), -50); + assert_eq!(super::__qsub(::std::i32::MAX, -10), ::std::i32::MAX); + assert_eq!(super::__qsub(::std::i32::MIN, 10), ::std::i32::MIN); + } + } +} diff --git a/crates/core_arch/src/acle/simd32.rs b/crates/core_arch/src/acle/simd32.rs index ae704ef175..eae0f0b830 100644 --- a/crates/core_arch/src/acle/simd32.rs +++ b/crates/core_arch/src/acle/simd32.rs @@ -425,36 +425,17 @@ pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 { #[cfg(test)] mod tests { - use core_arch::arm::*; - use core_arch::simd::*; + use core_arch::simd::{i8x4, i16x2}; use std::mem; use stdsimd_test::simd_test; - #[test] - fn qadd() { - unsafe { - assert_eq!(dsp::__qadd(-10, 60), 50); - assert_eq!(dsp::__qadd(::std::i32::MAX, 10), ::std::i32::MAX); - assert_eq!(dsp::__qadd(::std::i32::MIN, -10), ::std::i32::MIN); - } - } - - #[test] - fn qsub() { - unsafe { - assert_eq!(dsp::__qsub(10, 60), -50); - assert_eq!(dsp::__qsub(::std::i32::MAX, -10), ::std::i32::MAX); - assert_eq!(dsp::__qsub(::std::i32::MIN, 10), ::std::i32::MIN); - } - } - #[test] fn qadd8() { unsafe { let a = i8x4::new(1, 2, 3, ::std::i8::MAX); let b = i8x4::new(2, -1, 0, 1); let c = i8x4::new(3, 1, 3, ::std::i8::MAX); - let r: i8x4 = dsp_call!(dsp::__qadd8, a, b); + let r: i8x4 = dsp_call!(super::__qadd8, a, b); assert_eq!(r, c); } } @@ -465,7 +446,7 @@ mod tests { let a = i8x4::new(1, 2, 3, ::std::i8::MIN); let b = i8x4::new(2, -1, 0, 1); let c = i8x4::new(-1, 3, 3, ::std::i8::MIN); - let r: i8x4 = dsp_call!(dsp::__qsub8, a, b); + let r: i8x4 = dsp_call!(super::__qsub8, a, b); assert_eq!(r, c); } } @@ -476,7 +457,7 @@ mod tests { let a = i16x2::new(1, 2); let b = i16x2::new(2, -1); let c = i16x2::new(3, 1); - let r: i16x2 = dsp_call!(dsp::__qadd16, a, b); + let r: i16x2 = dsp_call!(super::__qadd16, a, b); assert_eq!(r, c); } } @@ -487,7 +468,7 @@ mod tests { let a = i16x2::new(10, 20); let b = i16x2::new(20, -10); let c = i16x2::new(-10, 30); - let r: i16x2 = dsp_call!(dsp::__qsub16, a, b); + let r: i16x2 = dsp_call!(super::__qsub16, a, b); assert_eq!(r, c); } } @@ -498,7 +479,7 @@ mod tests { let a = i16x2::new(1, ::std::i16::MAX); let b = i16x2::new(2, 2); let c = i16x2::new(-1, ::std::i16::MAX); - let r: i16x2 = dsp_call!(dsp::__qasx, a, b); + let r: i16x2 = dsp_call!(super::__qasx, a, b); assert_eq!(r, c); } } @@ -509,7 +490,7 @@ mod tests { let a = i16x2::new(1, ::std::i16::MAX); let b = i16x2::new(2, 2); let c = i16x2::new(3, ::std::i16::MAX - 2); - let r: i16x2 = dsp_call!(dsp::__qsax, a, b); + let r: i16x2 = dsp_call!(super::__qsax, a, b); assert_eq!(r, c); } } @@ -520,7 +501,7 @@ mod tests { let a = i16x2::new(1, ::std::i16::MAX); let b = i16x2::new(2, 2); let c = i16x2::new(3, -::std::i16::MAX); - let r: i16x2 = dsp_call!(dsp::__sadd16, a, b); + let r: i16x2 = dsp_call!(super::__sadd16, a, b); assert_eq!(r, c); } } @@ -531,7 +512,7 @@ mod tests { let a = i8x4::new(1, 2, 3, ::std::i8::MAX); let b = i8x4::new(4, 3, 2, 2); let c = i8x4::new(5, 5, 5, -::std::i8::MAX); - let r: i8x4 = dsp_call!(dsp::__sadd8, a, b); + let r: i8x4 = dsp_call!(super::__sadd8, a, b); assert_eq!(r, c); } } @@ -542,7 +523,7 @@ mod tests { let a = i16x2::new(1, 2); let b = i16x2::new(2, 1); let c = i16x2::new(0, 4); - let r: i16x2 = dsp_call!(dsp::__sasx, a, b); + let r: i16x2 = dsp_call!(super::__sasx, a, b); assert_eq!(r, c); } } @@ -552,7 +533,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(3, 4); - let r = dsp::__smlad(::mem::transmute(a), ::mem::transmute(b), 10); + let r = super::__smlad(::mem::transmute(a), ::mem::transmute(b), 10); assert_eq!(r, (1 * 3) + (2 * 4) + 10); } } @@ -562,7 +543,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(3, 4); - let r = dsp::__smlsd(::mem::transmute(a), ::mem::transmute(b), 10); + let r = super::__smlsd(::mem::transmute(a), ::mem::transmute(b), 10); assert_eq!(r, ((1 * 3) - (2 * 4)) + 10); } } @@ -573,9 +554,9 @@ mod tests { let a = i8x4::new(1, 2, 3, ::std::i8::MAX); let b = i8x4::new(4, 3, 2, 2); // call sadd8() to set GE bits - dsp::__sadd8(::mem::transmute(a), ::mem::transmute(b)); + super::__sadd8(::mem::transmute(a), ::mem::transmute(b)); let c = i8x4::new(1, 2, 3, ::std::i8::MAX); - let r: i8x4 = dsp_call!(dsp::__sel, a, b); + let r: i8x4 = dsp_call!(super::__sel, a, b); assert_eq!(r, c); } } @@ -586,7 +567,7 @@ mod tests { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(5, 4, 3, 2); let c = i8x4::new(3, 3, 3, 3); - let r: i8x4 = dsp_call!(dsp::__shadd8, a, b); + let r: i8x4 = dsp_call!(super::__shadd8, a, b); assert_eq!(r, c); } } @@ -597,7 +578,7 @@ mod tests { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); let c = i16x2::new(3, 3); - let r: i16x2 = dsp_call!(dsp::__shadd16, a, b); + let r: i16x2 = dsp_call!(super::__shadd16, a, b); assert_eq!(r, c); } } @@ -608,7 +589,7 @@ mod tests { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(5, 4, 3, 2); let c = i8x4::new(-2, -1, 0, 1); - let r: i8x4 = dsp_call!(dsp::__shsub8, a, b); + let r: i8x4 = dsp_call!(super::__shsub8, a, b); assert_eq!(r, c); } } @@ -619,7 +600,7 @@ mod tests { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); let c = i16x2::new(-2, -1); - let r: i16x2 = dsp_call!(dsp::__shsub16, a, b); + let r: i16x2 = dsp_call!(super::__shsub16, a, b); assert_eq!(r, c); } } @@ -629,7 +610,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); - let r = dsp::__smuad(::mem::transmute(a), ::mem::transmute(b)); + let r = super::__smuad(::mem::transmute(a), ::mem::transmute(b)); assert_eq!(r, 13); } } @@ -639,7 +620,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); - let r = dsp::__smuadx(::mem::transmute(a), ::mem::transmute(b)); + let r = super::__smuadx(::mem::transmute(a), ::mem::transmute(b)); assert_eq!(r, 14); } } @@ -649,7 +630,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); - let r = dsp::__smusd(::mem::transmute(a), ::mem::transmute(b)); + let r = super::__smusd(::mem::transmute(a), ::mem::transmute(b)); assert_eq!(r, -3); } } @@ -659,7 +640,7 @@ mod tests { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); - let r = dsp::__smusdx(::mem::transmute(a), ::mem::transmute(b)); + let r = super::__smusdx(::mem::transmute(a), ::mem::transmute(b)); assert_eq!(r, -6); } } @@ -669,7 +650,7 @@ mod tests { unsafe { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(4, 3, 2, 1); - let r = dsp::__usad8(::mem::transmute(a), ::mem::transmute(b)); + let r = super::__usad8(::mem::transmute(a), ::mem::transmute(b)); assert_eq!(r, 8); } } @@ -680,7 +661,7 @@ mod tests { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(4, 3, 2, 1); let c = 10; - let r = dsp::__usad8a(::mem::transmute(a), ::mem::transmute(b), c); + let r = super::__usada8(::mem::transmute(a), ::mem::transmute(b), c); assert_eq!(r, 8 + c); } } From 9d1bb44414fa8c9d6d76cea748f45182ab6753ba Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Mon, 18 Feb 2019 18:21:08 +0100 Subject: [PATCH 30/31] assert_instr: bump instruction limit for simd32 --- crates/stdsimd-test/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/stdsimd-test/src/lib.rs b/crates/stdsimd-test/src/lib.rs index dec44401d9..66ee9dd894 100644 --- a/crates/stdsimd-test/src/lib.rs +++ b/crates/stdsimd-test/src/lib.rs @@ -153,6 +153,10 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { // in some cases exceed the limit. "cvtpi2ps" => 25, + // core_arch/src/acle/simd32 + "usad8" => 27, + "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" => 29, + // Original limit was 20 instructions, but ARM DSP Intrinsics // are exactly 20 instructions long. So bump // the limit to 22 instead of adding here a From a36e20e22d7d34192c9a58408884800dfeeb0b48 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Mon, 18 Feb 2019 18:59:27 +0100 Subject: [PATCH 31/31] cargo fmt --- crates/core_arch/src/acle/simd32.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_arch/src/acle/simd32.rs b/crates/core_arch/src/acle/simd32.rs index eae0f0b830..a259f90d2c 100644 --- a/crates/core_arch/src/acle/simd32.rs +++ b/crates/core_arch/src/acle/simd32.rs @@ -425,7 +425,7 @@ pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 { #[cfg(test)] mod tests { - use core_arch::simd::{i8x4, i16x2}; + use core_arch::simd::{i16x2, i8x4}; use std::mem; use stdsimd_test::simd_test;