Skip to content

Commit

Permalink
Use __kuser_cmpxchg64 for 64-bit atomics on pre-v6 ARM Linux/Android
Browse files Browse the repository at this point in the history
  • Loading branch information
taiki-e committed Mar 25, 2023
1 parent f5bde1f commit d97ddb7
Show file tree
Hide file tree
Showing 13 changed files with 642 additions and 86 deletions.
37 changes: 37 additions & 0 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,23 @@ aarch64_linux_test_task:
# FEAT_LSE2 is tested on aarch64 macOS VM.
- RUSTFLAGS="$RUSTFLAGS -C target-feature=+lse" RUSTDOCFLAGS="$RUSTDOCFLAGS -C target-feature=+lse" ./tools/test.sh -vv

arm_linux_test_task:
name: test ($TARGET)
env:
TARGET: armv5te-unknown-linux-gnueabi
arm_container:
image: rust:latest
setup_script:
- set -ex
- lscpu
- rustup toolchain add nightly --no-self-update --component rust-src && rustup default nightly
- dpkg --add-architecture armel
- apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends gcc-arm-linux-gnueabi libc6-dev-armel-cross libc6:armel
test_script:
- set -ex
- export CARGO_TARGET_ARMV5TE_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc
- ./tools/test.sh --target "$TARGET" -vv

aarch64_macos_test_task:
name: test ($TARGET)
env:
Expand Down Expand Up @@ -112,6 +129,26 @@ aarch64_linux_valgrind_task:
# - RUSTFLAGS="${RUSTFLAGS} -C target-feature=-outline-atomics --cfg portable_atomic_no_outline_atomics" cargo bench -vv --manifest-path bench/Cargo.toml
# - RUSTFLAGS="${RUSTFLAGS} -C target-feature=+lse" cargo bench -vv --manifest-path bench/Cargo.toml

# arm_linux_bench_task:
# name: bench ($TARGET)
# env:
# TARGET: armv5te-unknown-linux-gnueabi
# arm_container:
# image: rust:latest
# cpu: 4
# memory: 12G
# setup_script:
# - set -ex
# - lscpu
# - rustup toolchain add nightly --no-self-update --component rust-src && rustup default nightly
# - rustup target add "$TARGET"
# - dpkg --add-architecture armel
# - apt-get -o Acquire::Retries=10 -qq update && apt-get -o Acquire::Retries=10 -o Dpkg::Use-Pty=0 install -y --no-install-recommends gcc-arm-linux-gnueabi libc6-dev-armel-cross libc6:armel
# test_script:
# - set -ex
# - export CARGO_TARGET_ARMV5TE_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc
# - RUSTFLAGS="${RUSTFLAGS}" cargo bench --target "$TARGET" -vv --manifest-path bench/Cargo.toml

# aarch64_macos_bench_task:
# name: bench ($TARGET)
# env:
Expand Down
2 changes: 2 additions & 0 deletions .github/.cspell/project-dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ adde
alcgr
algr
armasm
armel
autogen
auxv
auxvec
Expand Down Expand Up @@ -56,6 +57,7 @@ includepath
isel
ishld
isync
kuser
ldaxp
ldxp
lghi
Expand Down
1 change: 1 addition & 0 deletions .github/bors.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ status = [
"test (aarch64-unknown-linux-gnu)",
"test (aarch64-unknown-linux-gnu, glibc 2.17)",
"test (aarch64-unknown-linux-musl)",
"test (armv5te-unknown-linux-gnueabi)",
"test (aarch64-apple-darwin)",
"valgrind (aarch64-unknown-linux-gnu)",
]
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,18 @@ jobs:
target: aarch64-linux-android
- rust: nightly
target: arm-unknown-linux-gnueabi
- rust: stable
target: armv5te-unknown-linux-gnueabi
- rust: nightly
target: armv5te-unknown-linux-gnueabi
- rust: nightly
target: armv7-unknown-linux-gnueabi
- rust: nightly
target: armv7-unknown-linux-gnueabihf
- rust: nightly
target: armv5te-unknown-linux-musleabi
- rust: nightly
target: arm-linux-androideabi
- rust: nightly
target: i586-unknown-linux-gnu
- rust: nightly
Expand Down
3 changes: 3 additions & 0 deletions Cross.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@ passthrough = [
[target.aarch64-linux-android]
# Workaround https://github.com/cross-rs/cross/issues/1128 / https://github.com/rust-lang/rust/issues/103673
image = "ghcr.io/cross-rs/aarch64-linux-android:edge"
[target.arm-linux-androideabi]
# Workaround https://github.com/cross-rs/cross/issues/1128 / https://github.com/rust-lang/rust/issues/103673
image = "ghcr.io/cross-rs/arm-linux-androideabi:edge"
218 changes: 139 additions & 79 deletions bench/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ mod arch;
#[allow(dead_code, unused_imports)]
#[path = "../../src/imp/atomic128/s390x.rs"]
mod arch;
#[cfg(all(target_arch = "arm", any(target_os = "linux", target_os = "android")))]
#[allow(dead_code, unused_imports)]
#[path = "../../src/imp/arm_linux.rs"]
mod arch;
#[cfg(any(target_arch = "x86_64", all(target_arch = "aarch64", target_endian = "little")))]
#[allow(dead_code, unused_imports)]
#[path = "../../src/imp/atomic128/intrinsics.rs"]
Expand All @@ -60,59 +64,69 @@ trait AtomicInt<T: Copy>: Sized + Send + Sync {
fn load(&self) -> T;
fn store(&self, val: T);
fn swap(&self, val: T) -> T;
fn compare_exchange(&self, old: u128, new: u128) -> u128;
fn compare_exchange(&self, old: T, new: T) -> T;
fn fetch_add(&self, val: T) -> T;
}
macro_rules! impl_atomic_u128 {
($atomic_u128:path) => {
impl AtomicInt<u128> for $atomic_u128 {
fn new(v: u128) -> Self {
macro_rules! impl_atomic {
($atomic_type:path, $int_type:ident) => {
impl AtomicInt<$int_type> for $atomic_type {
#[inline]
fn new(v: $int_type) -> Self {
Self::new(v)
}
fn load(&self) -> u128 {
#[inline]
fn load(&self) -> $int_type {
self.load(Ordering::Acquire)
}
fn store(&self, val: u128) {
#[inline]
fn store(&self, val: $int_type) {
self.store(val, Ordering::Release);
}
fn swap(&self, val: u128) -> u128 {
#[inline]
fn swap(&self, val: $int_type) -> $int_type {
self.swap(val, Ordering::AcqRel)
}
fn compare_exchange(&self, old: u128, new: u128) -> u128 {
#[inline]
fn compare_exchange(&self, old: $int_type, new: $int_type) -> $int_type {
self.compare_exchange(old, new, Ordering::AcqRel, Ordering::Acquire)
.unwrap_or_else(|x| x)
}
fn fetch_add(&self, val: u128) -> u128 {
#[inline]
fn fetch_add(&self, val: $int_type) -> $int_type {
self.fetch_add(val, Ordering::AcqRel)
}
}
};
}
#[cfg(any(target_arch = "x86_64", all(target_arch = "aarch64", target_endian = "little")))]
impl_atomic_u128!(intrinsics::AtomicU128);
impl_atomic_u128!(arch::AtomicU128);
impl_atomic_u128!(seqlock_fallback::AtomicU128);
impl_atomic_u128!(spinlock_fallback::AtomicU128);
impl_atomic_u128!(atomic::Atomic<u128>);
impl AtomicInt<u128> for crossbeam_utils::atomic::AtomicCell<u128> {
fn new(v: u128) -> Self {
Self::new(v)
}
fn load(&self) -> u128 {
self.load()
}
fn store(&self, val: u128) {
self.store(val);
}
fn swap(&self, val: u128) -> u128 {
self.swap(val)
}
fn compare_exchange(&self, old: u128, new: u128) -> u128 {
self.compare_exchange(old, new).unwrap_or_else(|x| x)
}
fn fetch_add(&self, val: u128) -> u128 {
self.fetch_add(val)
}
macro_rules! impl_atomic_no_order {
($atomic_type:path, $int_type:ident) => {
impl AtomicInt<$int_type> for $atomic_type {
#[inline]
fn new(v: $int_type) -> Self {
Self::new(v)
}
#[inline]
fn load(&self) -> $int_type {
self.load()
}
#[inline]
fn store(&self, val: $int_type) {
self.store(val);
}
#[inline]
fn swap(&self, val: $int_type) -> $int_type {
self.swap(val)
}
#[inline]
fn compare_exchange(&self, old: $int_type, new: $int_type) -> $int_type {
self.compare_exchange(old, new).unwrap_or_else(|x| x)
}
#[inline]
fn fetch_add(&self, val: $int_type) -> $int_type {
self.fetch_add(val)
}
}
};
}

fn bench_concurrent_load<A: AtomicInt<T>, T: Copy + From<u32>>() -> A {
Expand Down Expand Up @@ -264,75 +278,121 @@ fn bench_concurrent_fetch_add<A: AtomicInt<T>, T: Copy + From<u32>>() -> A {
}

macro_rules! benches {
($name:ident, $atomic_u128:path) => {
fn $name(c: &mut Criterion) {
type A = $atomic_u128;
($name:ident, $atomic_type:path, $int_type:ident) => {
pub fn $name(c: &mut Criterion) {
type A = $atomic_type;
let mut g = c.benchmark_group(stringify!($name));
g.bench_function("u128_load", |b| {
g.bench_function(concat!(stringify!($int_type), "_load"), |b| {
let a = A::new(black_box(1));
b.iter(|| AtomicInt::<u128>::load(&a));
b.iter(|| AtomicInt::<$int_type>::load(&a));
});
g.bench_function("u128_store", |b| {
g.bench_function(concat!(stringify!($int_type), "_store"), |b| {
let a = A::new(black_box(1));
b.iter(|| AtomicInt::<u128>::store(&a, black_box(2)));
b.iter(|| AtomicInt::<$int_type>::store(&a, black_box(2)));
black_box(a);
});
g.bench_function("u128_swap", |b| {
g.bench_function(concat!(stringify!($int_type), "_swap"), |b| {
let a = A::new(black_box(1));
b.iter(|| AtomicInt::<u128>::swap(&a, black_box(2)));
b.iter(|| AtomicInt::<$int_type>::swap(&a, black_box(2)));
black_box(a);
});
g.bench_function("u128_compare_exchange_success", |b| {
g.bench_function(concat!(stringify!($int_type), "_compare_exchange_success"), |b| {
let a = A::new(black_box(1));
b.iter(|| AtomicInt::<u128>::compare_exchange(&a, black_box(1), black_box(2)));
b.iter(|| AtomicInt::<$int_type>::compare_exchange(&a, black_box(1), black_box(2)));
black_box(a);
});
g.bench_function("u128_compare_exchange_fail", |b| {
g.bench_function(concat!(stringify!($int_type), "_compare_exchange_fail"), |b| {
let a = A::new(black_box(1));
b.iter(|| AtomicInt::<u128>::compare_exchange(&a, black_box(2), black_box(3)));
b.iter(|| AtomicInt::<$int_type>::compare_exchange(&a, black_box(2), black_box(3)));
black_box(a);
});
g.bench_function("u128_fetch_add", |b| {
g.bench_function(concat!(stringify!($int_type), "_fetch_add"), |b| {
let a = A::new(black_box(1));
b.iter(|| AtomicInt::<u128>::fetch_add(&a, black_box(2)));
b.iter(|| AtomicInt::<$int_type>::fetch_add(&a, black_box(2)));
black_box(a);
});
g.bench_function("u128_concurrent_load", |b| {
b.iter(bench_concurrent_load::<A, u128>);
g.bench_function(concat!(stringify!($int_type), "_concurrent_load"), |b| {
b.iter(bench_concurrent_load::<A, $int_type>);
});
g.bench_function("u128_concurrent_load_store", |b| {
b.iter(bench_concurrent_load_store::<A, u128>);
g.bench_function(concat!(stringify!($int_type), "_concurrent_load_store"), |b| {
b.iter(bench_concurrent_load_store::<A, $int_type>);
});
g.bench_function("u128_concurrent_store", |b| {
b.iter(bench_concurrent_store::<A, u128>);
g.bench_function(concat!(stringify!($int_type), "_concurrent_store"), |b| {
b.iter(bench_concurrent_store::<A, $int_type>);
});
g.bench_function("u128_concurrent_swap", |b| {
b.iter(bench_concurrent_swap::<A, u128>);
g.bench_function(concat!(stringify!($int_type), "_concurrent_swap"), |b| {
b.iter(bench_concurrent_swap::<A, $int_type>);
});
g.bench_function("u128_concurrent_store_swap", |b| {
b.iter(bench_concurrent_store_swap::<A, u128>);
g.bench_function(concat!(stringify!($int_type), "_concurrent_store_swap"), |b| {
b.iter(bench_concurrent_store_swap::<A, $int_type>);
});
g.bench_function("u128_concurrent_fetch_add", |b| {
b.iter(bench_concurrent_fetch_add::<A, u128>);
g.bench_function(concat!(stringify!($int_type), "_concurrent_fetch_add"), |b| {
b.iter(bench_concurrent_fetch_add::<A, $int_type>);
});
}
};
}
#[cfg(any(target_arch = "x86_64", all(target_arch = "aarch64", target_endian = "little")))]
benches!(bench_portable_atomic_intrinsics, intrinsics::AtomicU128);
benches!(bench_portable_atomic_arch, arch::AtomicU128);
benches!(bench_portable_atomic_seqlock_fallback, seqlock_fallback::AtomicU128);
benches!(bench_portable_atomic_spinlock_fallback, spinlock_fallback::AtomicU128);
benches!(bench_atomic_cell, crossbeam_utils::atomic::AtomicCell<u128>);
benches!(bench_atomic_rs, atomic::Atomic<u128>);
#[cfg(any(
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "powerpc64",
target_arch = "s390x",
))]
mod bench {
use super::*;

#[cfg(any(target_arch = "x86_64", all(target_arch = "aarch64", target_endian = "little")))]
impl_atomic!(intrinsics::AtomicU128, u128);
impl_atomic!(arch::AtomicU128, u128);
impl_atomic!(seqlock_fallback::AtomicU128, u128);
impl_atomic!(spinlock_fallback::AtomicU128, u128);
impl_atomic!(atomic::Atomic<u128>, u128);
impl_atomic_no_order!(crossbeam_utils::atomic::AtomicCell<u128>, u128);

#[cfg(any(target_arch = "x86_64", all(target_arch = "aarch64", target_endian = "little")))]
benches!(bench_portable_atomic_intrinsics, intrinsics::AtomicU128, u128);
benches!(bench_portable_atomic_arch, arch::AtomicU128, u128);
benches!(bench_portable_atomic_seqlock_fallback, seqlock_fallback::AtomicU128, u128);
benches!(bench_portable_atomic_spinlock_fallback, spinlock_fallback::AtomicU128, u128);
benches!(bench_atomic_cell, crossbeam_utils::atomic::AtomicCell<u128>, u128);
benches!(bench_atomic_rs, atomic::Atomic<u128>, u128);

criterion_group!(
benches,
bench_portable_atomic_arch,
bench_portable_atomic_intrinsics,
bench_portable_atomic_seqlock_fallback,
bench_portable_atomic_spinlock_fallback,
bench_atomic_cell,
bench_atomic_rs
);
}
#[cfg(all(target_arch = "arm", any(target_os = "linux", target_os = "android")))]
mod bench {
use super::*;

#[cfg(all(target_arch = "arm", any(target_os = "linux", target_os = "android")))]
impl_atomic!(arch::AtomicU64, u64);
impl_atomic!(seqlock_fallback::AtomicU64, u64);
impl_atomic!(spinlock_fallback::AtomicU64, u64);
impl_atomic!(atomic::Atomic<u64>, u64);
impl_atomic_no_order!(crossbeam_utils::atomic::AtomicCell<u64>, u64);

#[cfg(all(target_arch = "arm", any(target_os = "linux", target_os = "android")))]
benches!(bench_portable_atomic_arch, arch::AtomicU64, u64);
benches!(bench_portable_atomic_seqlock_fallback, seqlock_fallback::AtomicU64, u64);
benches!(bench_portable_atomic_spinlock_fallback, spinlock_fallback::AtomicU64, u64);
benches!(bench_atomic_cell, crossbeam_utils::atomic::AtomicCell<u64>, u64);
benches!(bench_atomic_rs, atomic::Atomic<u64>, u64);

criterion_group!(
benches,
bench_portable_atomic_arch,
bench_portable_atomic_seqlock_fallback,
bench_portable_atomic_spinlock_fallback,
bench_atomic_cell,
bench_atomic_rs
);
}

criterion_group!(
benches,
bench_portable_atomic_arch,
bench_portable_atomic_intrinsics,
bench_portable_atomic_seqlock_fallback,
bench_portable_atomic_spinlock_fallback,
bench_atomic_cell,
bench_atomic_rs
);
criterion_main!(benches);
criterion_main!(bench::benches);
Loading

0 comments on commit d97ddb7

Please sign in to comment.