Skip to content

Commit

Permalink
arm64ec: Support 128-bit atomics
Browse files Browse the repository at this point in the history
  • Loading branch information
taiki-e committed Sep 21, 2024
1 parent 69f4ccf commit ed71daa
Show file tree
Hide file tree
Showing 16 changed files with 74 additions and 29 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ portable-atomic = { version = "1.3", default-features = false, features = ["requ

## 128-bit atomics support

Native 128-bit atomic operations are available on x86_64 (Rust 1.59+), AArch64 (Rust 1.59+), riscv64 (Rust 1.82+), powerpc64 (nightly only), and s390x (nightly only), otherwise the fallback implementation is used.
Native 128-bit atomic operations are available on x86_64 (Rust 1.59+), AArch64 (Rust 1.59+), riscv64 (Rust 1.82+), powerpc64 (nightly only), s390x (nightly only), and Arm64EC (nightly only), otherwise the fallback implementation is used.

On x86_64, even if `cmpxchg16b` is not available at compile-time (note: `cmpxchg16b` target feature is enabled by default only on Apple and Windows (except Windows 7) targets), run-time detection checks whether `cmpxchg16b` is available. If `cmpxchg16b` is not available at either compile-time or run-time detection, the fallback implementation is used. See also [`portable_atomic_no_outline_atomics`](#optional-cfg-no-outline-atomics) cfg.

Expand Down Expand Up @@ -170,7 +170,7 @@ RUSTFLAGS="--cfg portable_atomic_no_outline_atomics" cargo ...
If dynamic dispatching by run-time CPU feature detection is enabled, it allows maintaining support for older CPUs while using features that are not supported on older CPUs, such as CMPXCHG16B (x86_64) and FEAT_LSE/FEAT_LSE2 (AArch64).

Note:
- Dynamic detection is currently only enabled in Rust 1.59+ for x86_64 and AArch64, nightly only for powerpc64 (disabled by default), otherwise it works the same as when this cfg is set.
- Dynamic detection is currently only enabled in Rust 1.59+ for x86_64 and AArch64, Rust 1.82+ for riscv64 (disabled by default), nightly only for powerpc64 (disabled by default) and Arm64EC, otherwise it works the same as when this cfg is set.
- If the required target features are enabled at compile-time, the atomic operations are inlined.
- This is compatible with no-std (as with all features except `std`).
- On some targets, run-time detection is disabled by default mainly for compatibility with older versions of operating systems or incomplete build environments, and can be enabled by `--cfg portable_atomic_outline_atomics`. (When both cfg are enabled, `*_no_*` cfg is preferred.)
Expand Down
16 changes: 13 additions & 3 deletions bench/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ mod tests {
#[allow(dead_code, unused_imports)]
#[path = "../../src/imp/mod.rs"]
mod imp;
#[cfg(any(target_arch = "x86_64", all(target_arch = "aarch64", target_endian = "little")))]
#[cfg(any(
target_arch = "x86_64",
all(any(target_arch = "aarch64", target_arch = "arm64ec"), target_endian = "little")
))]
#[allow(dead_code, unused_imports)]
#[path = "../../src/imp/atomic128/intrinsics.rs"]
mod intrinsics;
Expand Down Expand Up @@ -343,21 +346,28 @@ macro_rules! benches {
#[cfg(any(
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "powerpc64",
target_arch = "s390x",
))]
mod bench {
use super::*;

#[cfg(any(target_arch = "x86_64", all(target_arch = "aarch64", target_endian = "little")))]
#[cfg(any(
target_arch = "x86_64",
all(any(target_arch = "aarch64", target_arch = "arm64ec"), target_endian = "little")
))]
impl_atomic!(intrinsics::AtomicU128, u128);
impl_atomic!(imp::AtomicU128, u128);
impl_atomic!(seqlock_fallback::AtomicU128, u128);
impl_atomic!(spinlock_fallback::AtomicU128, u128);
// impl_atomic!(atomic::Atomic<u128>, u128);
// impl_atomic_no_order!(crossbeam_utils::atomic::AtomicCell<u128>, u128);

#[cfg(any(target_arch = "x86_64", all(target_arch = "aarch64", target_endian = "little")))]
#[cfg(any(
target_arch = "x86_64",
all(any(target_arch = "aarch64", target_arch = "arm64ec"), target_endian = "little")
))]
benches!(bench_portable_atomic_intrinsics, intrinsics::AtomicU128, u128);
benches!(bench_portable_atomic_arch, imp::AtomicU128, u128);
benches!(bench_portable_atomic_seqlock_fallback, seqlock_fallback::AtomicU128, u128);
Expand Down
5 changes: 3 additions & 2 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ fn main() {
// https://github.com/rust-lang/rust/pull/111331 merged in Rust 1.71 (nightly-2023-05-09).
if !no_asm
&& (target_arch == "powerpc64" && version.probe(60, 2022, 2, 12)
|| target_arch == "s390x" && version.probe(71, 2023, 5, 8))
|| target_arch == "s390x" && version.probe(71, 2023, 5, 8)
|| target_arch == "arm64ec")
&& is_allowed_feature("asm_experimental_arch")
{
println!("cargo:rustc-cfg=portable_atomic_unstable_asm_experimental_arch");
Expand Down Expand Up @@ -230,7 +231,7 @@ fn main() {
target_feature_fallback("cmpxchg16b", has_cmpxchg16b);
}
}
"aarch64" => {
"aarch64" | "arm64ec" => {
// For Miri and ThreadSanitizer.
// https://github.com/rust-lang/rust/pull/97423 merged in Rust 1.64 (nightly-2022-06-30).
if version.nightly && version.probe(64, 2022, 6, 29) {
Expand Down
2 changes: 2 additions & 0 deletions src/cfgs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ mod atomic_64_macros {
target_arch = "aarch64",
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
),
all(target_arch = "arm64ec", portable_atomic_unstable_asm_experimental_arch),
all(
target_arch = "x86_64",
not(all(
Expand Down Expand Up @@ -362,6 +363,7 @@ mod atomic_128_macros {
target_arch = "aarch64",
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
),
all(target_arch = "arm64ec", portable_atomic_unstable_asm_experimental_arch),
all(
target_arch = "x86_64",
not(all(
Expand Down
2 changes: 1 addition & 1 deletion src/imp/atomic128/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Here is the table of targets that support 128-bit atomics and the instructions u
| target_arch | load | store | CAS | RMW | note |
| ----------- | ---- | ----- | --- | --- | ---- |
| x86_64 | cmpxchg16b or vmovdqa | cmpxchg16b or vmovdqa | cmpxchg16b | cmpxchg16b | cmpxchg16b target feature required. vmovdqa requires Intel, AMD, or Zhaoxin CPU with AVX. <br> Both compile-time and run-time detection are supported for cmpxchg16b. vmovdqa is currently run-time detection only. <br> Requires rustc 1.59+ |
| aarch64 | ldxp/stxp or casp or ldp/ldiapp | ldxp/stxp or casp or stp/stilp/swpp | ldxp/stxp or casp | ldxp/stxp or casp/swpp/ldclrp/ldsetp | casp requires lse target feature, ldp/stp requires lse2 target feature, ldiapp/stilp requires lse2 and rcpc3 target features, swpp/ldclrp/ldsetp requires lse128 target feature. <br> Both compile-time and run-time detection are supported. <br> Requires rustc 1.59+ |
| aarch64/arm64ec | ldxp/stxp or casp or ldp/ldiapp | ldxp/stxp or casp or stp/stilp/swpp | ldxp/stxp or casp | ldxp/stxp or casp/swpp/ldclrp/ldsetp | casp requires lse target feature, ldp/stp requires lse2 target feature, ldiapp/stilp requires lse2 and rcpc3 target features, swpp/ldclrp/ldsetp requires lse128 target feature. <br> Both compile-time and run-time detection are supported. <br> Requires rustc 1.59+ (aarch64) / nightly (arm64ec) |
| riscv64 | amocas.q | amocas.q | amocas.q | amocas.q | Experimental because LLVM marking the corresponding target feature as experimental. Requires experimental-zacas target feature. Both compile-time and run-time detection are supported (run-time detection is currently disabled by default). <br> Requires 1.82+ (LLVM 19+) |
| powerpc64 | lq | stq | lqarx/stqcx. | lqarx/stqcx. | Requires target-cpu pwr8+ (powerpc64le is pwr8 by default). Both compile-time and run-time detection are supported (run-time detection is currently disabled by default). <br> Requires nightly |
| s390x | lpq | stpq | cdsg | cdsg | Requires nightly |
Expand Down
6 changes: 3 additions & 3 deletions src/imp/atomic128/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ See README.md for details.
*/

// AArch64
#[cfg(all(
target_arch = "aarch64",
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
#[cfg(any(
all(target_arch = "aarch64", any(not(portable_atomic_no_asm), portable_atomic_unstable_asm)),
all(target_arch = "arm64ec", portable_atomic_unstable_asm_experimental_arch)
))]
// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly.
#[cfg_attr(
Expand Down
4 changes: 2 additions & 2 deletions src/imp/core_atomic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ macro_rules! atomic_int {
{
#[cfg(any(
all(
target_arch = "aarch64",
any(target_arch = "aarch64", target_arch = "arm64ec"),
any(target_feature = "lse", portable_atomic_target_feature = "lse"),
),
all(
Expand Down Expand Up @@ -315,7 +315,7 @@ macro_rules! atomic_int {
{
#[cfg(any(
all(
target_arch = "aarch64",
any(target_arch = "aarch64", target_arch = "arm64ec"),
any(target_feature = "lse", portable_atomic_target_feature = "lse"),
),
all(
Expand Down
2 changes: 1 addition & 1 deletion src/imp/detect/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Here is the table of targets that support run-time CPU feature detection and the
| aarch64 | openbsd | sysctl | all | Enabled by default |
| aarch64 | macos/ios/tvos/watchos/visionos | sysctlbyname | all | Currently only used in tests (see [aarch64_apple.rs](aarch64_apple.rs)). |
| aarch64 | illumos | getisax | lse, lse2 | Disabled by default |
| aarch64 | windows | IsProcessorFeaturePresent | lse | Enabled by default |
| aarch64/arm64ec | windows | IsProcessorFeaturePresent | lse | Enabled by default |
| aarch64 | fuchsia | zx_system_get_features | lse | Enabled by default |
| riscv32 | linux | riscv_hwprobe | all | Disabled by default |
| riscv64 | linux | riscv_hwprobe | all | Disabled by default |
Expand Down
4 changes: 2 additions & 2 deletions src/imp/detect/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ macro_rules! flags {
};
}

#[cfg(target_arch = "aarch64")]
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
flags! {
// FEAT_LSE, Large System Extensions
// https://developer.arm.com/documentation/109697/0100/Feature-descriptions/The-Armv8-1-architecture-extension
Expand Down Expand Up @@ -250,7 +250,7 @@ mod tests_common {
let _ = stdout.write_all(features.as_bytes());
}

#[cfg(target_arch = "aarch64")]
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
#[test]
#[cfg_attr(portable_atomic_test_outline_atomics_detect_false, ignore)]
fn test_detect() {
Expand Down
8 changes: 7 additions & 1 deletion src/imp/fallback/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,12 @@ use core::ops;
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_ppc64x.go#L9
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/powerpc/include/asm/cache.h#L26
#[cfg_attr(
any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "powerpc64"),
any(
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "powerpc64",
),
repr(align(128))
)]
// arm, mips, mips64, sparc, and hexagon have 32-byte cache line size.
Expand Down Expand Up @@ -70,6 +75,7 @@ use core::ops;
not(any(
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "powerpc64",
target_arch = "arm",
target_arch = "mips",
Expand Down
9 changes: 6 additions & 3 deletions src/imp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ mod atomic64;
// 128-bit atomic implementations on 64-bit architectures
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "powerpc64",
target_arch = "riscv64",
target_arch = "s390x",
Expand All @@ -96,6 +97,7 @@ mod atomic128;
target_arch = "aarch64",
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
),
all(target_arch = "arm64ec", portable_atomic_unstable_asm_experimental_arch),
all(
target_arch = "x86_64",
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
Expand Down Expand Up @@ -296,6 +298,7 @@ items! {
target_arch = "aarch64",
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
),
all(target_arch = "arm64ec", portable_atomic_unstable_asm_experimental_arch),
all(
target_arch = "x86_64",
not(all(
Expand Down Expand Up @@ -398,9 +401,9 @@ pub(crate) use self::atomic64::riscv32::{AtomicI64, AtomicU64};

// 128-bit atomics (platform-specific)
// AArch64
#[cfg(all(
target_arch = "aarch64",
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
#[cfg(any(
all(target_arch = "aarch64", any(not(portable_atomic_no_asm), portable_atomic_unstable_asm)),
all(target_arch = "arm64ec", portable_atomic_unstable_asm_experimental_arch)
))]
pub(crate) use self::atomic128::aarch64::{AtomicI128, AtomicU128};
// x86_64 & (cmpxchg16b | outline-atomics)
Expand Down
10 changes: 7 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ portable-atomic = { version = "1.3", default-features = false, features = ["requ
## 128-bit atomics support
Native 128-bit atomic operations are available on x86_64 (Rust 1.59+), AArch64 (Rust 1.59+), riscv64 (Rust 1.82+), powerpc64 (nightly only), and s390x (nightly only), otherwise the fallback implementation is used.
Native 128-bit atomic operations are available on x86_64 (Rust 1.59+), AArch64 (Rust 1.59+), riscv64 (Rust 1.82+), powerpc64 (nightly only), s390x (nightly only), and Arm64EC (nightly only), otherwise the fallback implementation is used.
On x86_64, even if `cmpxchg16b` is not available at compile-time (note: `cmpxchg16b` target feature is enabled by default only on Apple and Windows (except Windows 7) targets), run-time detection checks whether `cmpxchg16b` is available. If `cmpxchg16b` is not available at either compile-time or run-time detection, the fallback implementation is used. See also [`portable_atomic_no_outline_atomics`](#optional-cfg-no-outline-atomics) cfg.
Expand Down Expand Up @@ -164,7 +164,7 @@ RUSTFLAGS="--cfg portable_atomic_no_outline_atomics" cargo ...
If dynamic dispatching by run-time CPU feature detection is enabled, it allows maintaining support for older CPUs while using features that are not supported on older CPUs, such as CMPXCHG16B (x86_64) and FEAT_LSE/FEAT_LSE2 (AArch64).
Note:
- Dynamic detection is currently only enabled in Rust 1.59+ for x86_64 and AArch64, nightly only for powerpc64 (disabled by default), otherwise it works the same as when this cfg is set.
- Dynamic detection is currently only enabled in Rust 1.59+ for x86_64 and AArch64, Rust 1.82+ for riscv64 (disabled by default), nightly only for powerpc64 (disabled by default) and Arm64EC, otherwise it works the same as when this cfg is set.
- If the required target features are enabled at compile-time, the atomic operations are inlined.
- This is compatible with no-std (as with all features except `std`).
- On some targets, run-time detection is disabled by default mainly for compatibility with older versions of operating systems or incomplete build environments, and can be enabled by `--cfg portable_atomic_outline_atomics`. (When both cfg are enabled, `*_no_*` cfg is preferred.)
Expand Down Expand Up @@ -215,7 +215,7 @@ RUSTFLAGS="--cfg portable_atomic_no_outline_atomics" cargo ...
#![allow(clippy::inline_always)]
// asm_experimental_arch
// AVR, MSP430, and Xtensa are tier 3 platforms and require nightly anyway.
// On tier 2 platforms (powerpc64 and s390x), we use cfg set by build script to
// On tier 2 platforms (arm64ec, powerpc64, and s390x), we use cfg set by build script to
// determine whether this feature is available or not.
#![cfg_attr(
all(
Expand All @@ -224,6 +224,7 @@ RUSTFLAGS="--cfg portable_atomic_no_outline_atomics" cargo ...
target_arch = "avr",
target_arch = "msp430",
all(target_arch = "xtensa", portable_atomic_unsafe_assume_single_core),
all(target_arch = "arm64ec", portable_atomic_unstable_asm_experimental_arch),
all(target_arch = "powerpc64", portable_atomic_unstable_asm_experimental_arch),
all(target_arch = "s390x", portable_atomic_unstable_asm_experimental_arch),
),
Expand Down Expand Up @@ -277,6 +278,7 @@ RUSTFLAGS="--cfg portable_atomic_no_outline_atomics" cargo ...
all(
any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "powerpc64",
target_arch = "riscv64",
target_arch = "s390x",
Expand All @@ -289,6 +291,7 @@ RUSTFLAGS="--cfg portable_atomic_no_outline_atomics" cargo ...
all(
any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "powerpc64",
target_arch = "riscv64",
target_arch = "s390x",
Expand Down Expand Up @@ -358,6 +361,7 @@ compile_error!(
#[cfg(portable_atomic_no_outline_atomics)]
#[cfg(not(any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "arm",
target_arch = "powerpc64",
target_arch = "riscv32",
Expand Down
1 change: 1 addition & 0 deletions src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ fn test_is_lock_free() {
assert!(!AtomicU128::is_lock_free());
} else if cfg!(any(
target_arch = "aarch64",
all(target_arch = "arm64ec", portable_atomic_unstable_asm_experimental_arch),
all(
target_arch = "x86_64",
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
Expand Down
17 changes: 15 additions & 2 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ macro_rules! doc_comment {
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "arm64ec",
target_arch = "powerpc64",
target_arch = "riscv32",
target_arch = "riscv64",
Expand Down Expand Up @@ -82,6 +83,7 @@ macro_rules! ifunc {
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "arm64ec",
target_arch = "powerpc64",
target_arch = "riscv32",
target_arch = "riscv64",
Expand Down Expand Up @@ -347,6 +349,7 @@ pub(crate) fn zero_extend64_ptr(v: *mut ()) -> core::mem::MaybeUninit<u64> {
#[allow(dead_code)]
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "powerpc64",
target_arch = "riscv64",
target_arch = "s390x",
Expand Down Expand Up @@ -379,11 +382,21 @@ pub(crate) union U64 {
#[repr(C)]
pub(crate) struct Pair<T: Copy> {
// little endian order
#[cfg(any(target_endian = "little", target_arch = "aarch64", target_arch = "arm"))]
#[cfg(any(
target_endian = "little",
target_arch = "aarch64",
target_arch = "arm",
target_arch = "arm64ec",
))]
pub(crate) lo: T,
pub(crate) hi: T,
// big endian order
#[cfg(not(any(target_endian = "little", target_arch = "aarch64", target_arch = "arm")))]
#[cfg(not(any(
target_endian = "little",
target_arch = "aarch64",
target_arch = "arm",
target_arch = "arm64ec",
)))]
pub(crate) lo: T,
}

Expand Down
6 changes: 3 additions & 3 deletions tests/helper/src/cpuinfo.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT

#![cfg(any(target_arch = "aarch64", target_arch = "powerpc64"))]
#![cfg(any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "powerpc64"))]

use std::{boxed::Box, path::Path, vec::Vec};

Expand All @@ -17,7 +17,7 @@ type Result<T, E = Box<dyn std::error::Error + Send + Sync>> = std::result::Resu
/// This is used for testing to ensure that the result of the CPU feature
/// detection we are using matches the information we get from the other
/// approaches.
#[cfg(target_arch = "aarch64")]
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
#[derive(Debug, Clone, Copy)]
pub struct ProcCpuinfo {
pub lse: bool,
Expand All @@ -33,7 +33,7 @@ pub struct ProcCpuinfo {
pub power10: Option<bool>,
}
impl ProcCpuinfo {
#[cfg(target_arch = "aarch64")]
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
pub fn new() -> Result<Self> {
use std::process::Command;
if cfg!(any(target_os = "linux", target_os = "android", target_os = "netbsd")) {
Expand Down
Loading

0 comments on commit ed71daa

Please sign in to comment.