Skip to content

Commit

Permalink
riscv: Support run-time detection for Zacas extension on pre-1.82 rustc
Browse files Browse the repository at this point in the history
`.insn <value>` directive requires LLVM 19, so use `.4byte` directive.
llvm/llvm-project@2a086dc
  • Loading branch information
taiki-e committed Oct 26, 2024
1 parent 4ab2cd8 commit be4707d
Show file tree
Hide file tree
Showing 13 changed files with 52 additions and 44 deletions.
27 changes: 25 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -258,12 +258,32 @@ jobs:
# target: powerpc64le-unknown-linux-gnu
- rust: nightly
target: powerpc64le-unknown-linux-gnu
- rust: nightly-2022-08-12 # Rust 1.65, LLVM 14
target: riscv32gc-unknown-linux-gnu
# - rust: nightly-2023-03-25 # Rust 1.70, LLVM 15
# target: riscv32gc-unknown-linux-gnu
# - rust: nightly-2023-08-08 # Rust 1.73, LLVM 16
# target: riscv32gc-unknown-linux-gnu
# - rust: nightly-2024-02-13 # Rust 1.78, LLVM 17
# target: riscv32gc-unknown-linux-gnu
# - rust: nightly-2024-07-31 # Rust 1.82, LLVM 18
# target: riscv32gc-unknown-linux-gnu
- rust: nightly
target: riscv32gc-unknown-linux-gnu
- rust: nightly-2021-08-21 # Rust 1.56, LLVM 12
target: riscv64gc-unknown-linux-gnu
- rust: '1.59' # LLVM 13
target: riscv64gc-unknown-linux-gnu
# - rust: '1.64' # LLVM 14
# target: riscv64gc-unknown-linux-gnu
# - rust: '1.69' # LLVM 15
# target: riscv64gc-unknown-linux-gnu
# - rust: '1.72' # LLVM 16
# target: riscv64gc-unknown-linux-gnu
# - rust: '1.77' # LLVM 17
# target: riscv64gc-unknown-linux-gnu
# - rust: '1.81' # LLVM 18
# target: riscv64gc-unknown-linux-gnu
- rust: stable
target: riscv64gc-unknown-linux-gnu
- rust: nightly
Expand Down Expand Up @@ -569,8 +589,11 @@ jobs:
fail-fast: false
matrix:
rust:
- '1.64'
- '1.72'
- '1.64' # LLVM 14
# - '1.69' # LLVM 15
- '1.72' # LLVM 16
# - '1.77' # LLVM 17
# - '1.81' # LLVM 18
- stable
- nightly
runs-on: ubuntu-latest
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ portable-atomic = { version = "1.3", default-features = false, features = ["requ

## 128-bit atomics support

Native 128-bit atomic operations are available on x86_64 (Rust 1.59+), AArch64 (Rust 1.59+), riscv64 (Rust 1.82+), powerpc64 (nightly only), s390x (nightly only), and Arm64EC (nightly only), otherwise the fallback implementation is used.
Native 128-bit atomic operations are available on x86_64 (Rust 1.59+), AArch64 (Rust 1.59+), riscv64 (Rust 1.59+), powerpc64 (nightly only), s390x (nightly only), and Arm64EC (nightly only), otherwise the fallback implementation is used.

On x86_64, even if `cmpxchg16b` is not available at compile-time (note: `cmpxchg16b` target feature is enabled by default only on Apple and Windows (except Windows 7) targets), run-time detection checks whether `cmpxchg16b` is available. If `cmpxchg16b` is not available at either compile-time or run-time detection, the fallback implementation is used. See also [`portable_atomic_no_outline_atomics`](#optional-cfg-no-outline-atomics) cfg.

Expand Down Expand Up @@ -170,7 +170,7 @@ RUSTFLAGS="--cfg portable_atomic_no_outline_atomics" cargo ...
If dynamic dispatching by run-time CPU feature detection is enabled, it allows maintaining support for older CPUs while using features that are not supported on older CPUs, such as CMPXCHG16B (x86_64) and FEAT_LSE/FEAT_LSE2 (AArch64).

Note:
- Dynamic detection is currently only enabled in Rust 1.59+ for x86_64 and AArch64, Rust 1.82+ for RISC-V (disabled by default), nightly only for powerpc64 and Arm64EC, otherwise it works the same as when this cfg is set.
- Dynamic detection is currently only enabled in Rust 1.59+ for x86_64, AArch64, and RISC-V (disabled by default on RISC-V), nightly only for powerpc64 and Arm64EC, otherwise it works the same as when this cfg is set.
- If the required target features are enabled at compile-time, the atomic operations are inlined.
- This is compatible with no-std (as with all features except `std`).
- On some targets, run-time detection is disabled by default mainly for incomplete build environments, and can be enabled by `--cfg portable_atomic_outline_atomics`. (When both cfg are enabled, `*_no_*` cfg is preferred.)
Expand Down
17 changes: 7 additions & 10 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ fn main() {
// Custom cfgs set by build script. Not public API.
// grep -F 'cargo:rustc-cfg=' build.rs | grep -Ev '^ *//' | sed -E 's/^.*cargo:rustc-cfg=//; s/(=\\)?".*$//' | LC_ALL=C sort -u | tr '\n' ',' | sed -E 's/,$/\n/'
println!(
"cargo:rustc-check-cfg=cfg(portable_atomic_disable_fiq,portable_atomic_force_amo,portable_atomic_ll_sc_rmw,portable_atomic_new_atomic_intrinsics,portable_atomic_no_asm,portable_atomic_no_asm_maybe_uninit,portable_atomic_no_atomic_64,portable_atomic_no_atomic_cas,portable_atomic_no_atomic_load_store,portable_atomic_no_atomic_min_max,portable_atomic_no_cfg_target_has_atomic,portable_atomic_no_cmpxchg16b_intrinsic,portable_atomic_no_cmpxchg16b_target_feature,portable_atomic_no_const_mut_refs,portable_atomic_no_offset_of,portable_atomic_no_const_raw_ptr_deref,portable_atomic_no_const_transmute,portable_atomic_no_core_unwind_safe,portable_atomic_no_diagnostic_namespace,portable_atomic_no_stronger_failure_ordering,portable_atomic_no_track_caller,portable_atomic_no_unsafe_op_in_unsafe_fn,portable_atomic_pre_llvm_15,portable_atomic_pre_llvm_16,portable_atomic_pre_llvm_18,portable_atomic_pre_llvm_19,portable_atomic_s_mode,portable_atomic_sanitize_thread,portable_atomic_target_feature,portable_atomic_unsafe_assume_single_core,portable_atomic_unstable_asm,portable_atomic_unstable_asm_experimental_arch,portable_atomic_unstable_cfg_target_has_atomic,portable_atomic_unstable_isa_attribute)"
"cargo:rustc-check-cfg=cfg(portable_atomic_disable_fiq,portable_atomic_force_amo,portable_atomic_ll_sc_rmw,portable_atomic_new_atomic_intrinsics,portable_atomic_no_asm,portable_atomic_no_asm_maybe_uninit,portable_atomic_no_atomic_64,portable_atomic_no_atomic_cas,portable_atomic_no_atomic_load_store,portable_atomic_no_atomic_min_max,portable_atomic_no_cfg_target_has_atomic,portable_atomic_no_cmpxchg16b_intrinsic,portable_atomic_no_cmpxchg16b_target_feature,portable_atomic_no_const_mut_refs,portable_atomic_no_offset_of,portable_atomic_no_const_raw_ptr_deref,portable_atomic_no_const_transmute,portable_atomic_no_core_unwind_safe,portable_atomic_no_diagnostic_namespace,portable_atomic_no_stronger_failure_ordering,portable_atomic_no_track_caller,portable_atomic_no_unsafe_op_in_unsafe_fn,portable_atomic_pre_llvm_15,portable_atomic_pre_llvm_16,portable_atomic_pre_llvm_18,portable_atomic_s_mode,portable_atomic_sanitize_thread,portable_atomic_target_feature,portable_atomic_unsafe_assume_single_core,portable_atomic_unstable_asm,portable_atomic_unstable_asm_experimental_arch,portable_atomic_unstable_cfg_target_has_atomic,portable_atomic_unstable_isa_attribute)"
);
// TODO: handle multi-line target_feature_fallback
// grep -F 'target_feature_fallback("' build.rs | grep -Ev '^ *//' | sed -E 's/^.*target_feature_fallback\(//; s/",.*$/"/' | LC_ALL=C sort -u | tr '\n' ',' | sed -E 's/,$/\n/'
Expand Down Expand Up @@ -175,15 +175,12 @@ fn main() {
println!("cargo:rustc-cfg=portable_atomic_no_atomic_load_store");
}

if version.llvm < 19 {
println!("cargo:rustc-cfg=portable_atomic_pre_llvm_19");
if version.llvm < 18 {
println!("cargo:rustc-cfg=portable_atomic_pre_llvm_18");
if version.llvm < 16 {
println!("cargo:rustc-cfg=portable_atomic_pre_llvm_16");
if version.llvm < 15 {
println!("cargo:rustc-cfg=portable_atomic_pre_llvm_15");
}
if version.llvm < 18 {
println!("cargo:rustc-cfg=portable_atomic_pre_llvm_18");
if version.llvm < 16 {
println!("cargo:rustc-cfg=portable_atomic_pre_llvm_16");
if version.llvm < 15 {
println!("cargo:rustc-cfg=portable_atomic_pre_llvm_15");
}
}
}
Expand Down
4 changes: 0 additions & 4 deletions src/cfgs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@ mod atomic_32_macros {
target_arch = "riscv32",
not(any(miri, portable_atomic_sanitize_thread)),
not(portable_atomic_no_asm),
not(portable_atomic_pre_llvm_19),
any(
target_feature = "experimental-zacas",
portable_atomic_target_feature = "experimental-zacas",
Expand Down Expand Up @@ -221,7 +220,6 @@ mod atomic_64_macros {
target_arch = "riscv32",
not(any(miri, portable_atomic_sanitize_thread)),
not(portable_atomic_no_asm),
not(portable_atomic_pre_llvm_19),
any(
target_feature = "experimental-zacas",
portable_atomic_target_feature = "experimental-zacas",
Expand Down Expand Up @@ -277,7 +275,6 @@ mod atomic_64_macros {
all(
target_arch = "riscv64",
not(portable_atomic_no_asm),
not(portable_atomic_pre_llvm_19),
any(
target_feature = "experimental-zacas",
portable_atomic_target_feature = "experimental-zacas",
Expand Down Expand Up @@ -386,7 +383,6 @@ mod atomic_128_macros {
all(
target_arch = "riscv64",
not(portable_atomic_no_asm),
not(portable_atomic_pre_llvm_19),
any(
target_feature = "experimental-zacas",
portable_atomic_target_feature = "experimental-zacas",
Expand Down
2 changes: 1 addition & 1 deletion src/imp/atomic128/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Here is the table of targets that support 128-bit atomics and the instructions u
| ----------- | ---- | ----- | --- | --- | ---- |
| x86_64 | cmpxchg16b or vmovdqa | cmpxchg16b or vmovdqa | cmpxchg16b | cmpxchg16b | cmpxchg16b target feature required. vmovdqa requires Intel, AMD, or Zhaoxin CPU with AVX. <br> Both compile-time and run-time detection are supported for cmpxchg16b. vmovdqa is currently run-time detection only. <br> Requires rustc 1.59+ |
| aarch64/arm64ec | ldxp/stxp or casp or ldp/ldiapp | ldxp/stxp or casp or stp/stilp/swpp | ldxp/stxp or casp | ldxp/stxp or casp/swpp/ldclrp/ldsetp | casp requires lse target feature, ldp/stp requires lse2 target feature, ldiapp/stilp requires lse2 and rcpc3 target features, swpp/ldclrp/ldsetp requires lse128 target feature. <br> Both compile-time and run-time detection are supported. <br> Requires rustc 1.59+ (aarch64) / nightly (arm64ec) |
| riscv64 | amocas.q | amocas.q | amocas.q | amocas.q | Experimental because LLVM marking the corresponding target feature as experimental. Requires experimental-zacas target feature. Both compile-time and run-time detection are supported (run-time detection is currently disabled by default). <br> Requires rustc 1.82+ (LLVM 19+) |
| riscv64 | amocas.q | amocas.q | amocas.q | amocas.q | Experimental because LLVM marking the corresponding target feature as experimental. Requires experimental-zacas target feature. Both compile-time and run-time detection are supported (run-time detection is currently disabled by default). <br> Requires rustc 1.59+ |
| powerpc64 | lq | stq | lqarx/stqcx. | lqarx/stqcx. | Requires target-cpu pwr8+ (powerpc64le is pwr8 by default). Both compile-time and run-time detection are supported. <br> Requires nightly |
| s390x | lpq | stpq | cdsg | cdsg | Requires nightly |

Expand Down
1 change: 0 additions & 1 deletion src/imp/atomic128/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ pub(super) mod powerpc64;
#[cfg(all(
target_arch = "riscv64",
not(portable_atomic_no_asm),
not(portable_atomic_pre_llvm_19),
any(
target_feature = "experimental-zacas",
portable_atomic_target_feature = "experimental-zacas",
Expand Down
14 changes: 7 additions & 7 deletions src/imp/atomic128/riscv64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ macro_rules! debug_assert_zacas {
}

// LLVM doesn't support `.option arch, +zabha` directive as of LLVM 19 because it is experimental.
// So, we currently always using .insn directive.
// `.insn <value>` directive requires LLVM 19.
// So, we currently always using .4byte directive.
// Note that `.insn <value>` directive requires LLVM 19.
// https://github.com/llvm/llvm-project/commit/2a086dce691e3cc34a2fc27f4fb255bb2cbbfac9
// // https://github.com/riscv-non-isa/riscv-asm-manual/blob/ad0de8c004e29c9a7ac33cfd054f4d4f9392f2fb/src/asm-manual.adoc#arch
// macro_rules! start_zacas {
Expand All @@ -73,7 +73,7 @@ macro_rules! debug_assert_zacas {
// }

// LLVM doesn't support `.option arch, +zabha` directive as of LLVM 19 because it is experimental.
// So, we currently always using .insn directive.
// So, we currently always using .4byte directive.
// macro_rules! atomic_rmw_amocas_order {
// ($op:ident, $order:ident) => {
// atomic_rmw_amocas_order!($op, $order, failure = $order)
Expand Down Expand Up @@ -173,7 +173,7 @@ unsafe fn atomic_load_zacas(src: *mut u128, order: Ordering) -> u128 {
unsafe {
let (out_lo, out_hi);
// LLVM doesn't support `.option arch, +zabha` directive as of LLVM 19 because it is experimental.
// So, we currently always using .insn directive.
// So, we currently always using .4byte directive.
// macro_rules! load {
// ($fence:tt, $asm_order:tt) => {
// asm!(
Expand All @@ -194,7 +194,7 @@ unsafe fn atomic_load_zacas(src: *mut u128, order: Ordering) -> u128 {
asm!(
$fence,
// 4: 2{8,c,a,e}c5462f amocas.q{,.aq,.rl,.aqrl} a2, a2, (a0)
concat!(".insn 0x2", $insn_order, "c5462f"),
concat!(".4byte 0x2", $insn_order, "c5462f"),
in("a0") ptr_reg!(src),
inout("a2") 0_u64 => out_lo,
inout("a3") 0_u64 => out_hi,
Expand Down Expand Up @@ -324,7 +324,7 @@ unsafe fn atomic_compare_exchange_zacas(
let new = U128 { whole: new };
let (prev_lo, prev_hi);
// LLVM doesn't support `.option arch, +zabha` directive as of LLVM 19 because it is experimental.
// So, we currently always using .insn directive.
// So, we currently always using .4byte directive.
// macro_rules! cmpxchg {
// ($fence:tt, $asm_order:tt) => {
// asm!(
Expand All @@ -349,7 +349,7 @@ unsafe fn atomic_compare_exchange_zacas(
asm!(
$fence,
// c: 2{8,c,a,e}c5472f amocas.q{,.aq,.rl,.aqrl} a4, a2, (a0)
concat!(".insn 0x2", $insn_order, "c5472f"),
concat!(".4byte 0x2", $insn_order, "c5472f"),
in("a0") ptr_reg!(dst),
// must be allocated to even/odd register pair
inout("a4") old.pair.lo => prev_lo,
Expand Down
2 changes: 1 addition & 1 deletion src/imp/atomic64/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Here is the table of targets that support 64-bit atomics and the instructions us
| ----------- | ---- | ----- | --- | --- | ---- |
| x86 | cmpxchg8b or fild or movlps or movq | cmpxchg8b or fistp or movlps | cmpxchg8b | cmpxchg8b | provided by `core::sync::atomic` |
| arm | ldrexd | ldrexd/strexd | ldrexd/strexd | ldrexd/strexd | provided by `core::sync::atomic` for Armv6+, otherwise provided by us for Linux/Android using kuser_cmpxchg64 (see [arm_linux.rs](arm_linux.rs) for more) |
| riscv32 | amocas.d | amocas.d | amocas.d | amocas.d | Experimental because LLVM marking the corresponding target feature as experimental. Requires experimental-zacas target feature. Both compile-time and run-time detection are supported (run-time detection is currently disabled by default). <br> Requires rustc 1.82+ (LLVM 19+) |
| riscv32 | amocas.d | amocas.d | amocas.d | amocas.d | Experimental because LLVM marking the corresponding target feature as experimental. Requires experimental-zacas target feature. Both compile-time and run-time detection are supported (run-time detection is currently disabled by default). <br> Requires rustc 1.59+ |

If `core::sync::atomic` provides 64-bit atomics, we use them.
On compiler versions or platforms where these are not supported, the fallback implementation is used.
Expand Down
1 change: 0 additions & 1 deletion src/imp/atomic64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ pub(super) mod arm_linux;
target_arch = "riscv32",
not(any(miri, portable_atomic_sanitize_thread)),
not(portable_atomic_no_asm),
not(portable_atomic_pre_llvm_19),
any(
target_feature = "experimental-zacas",
portable_atomic_target_feature = "experimental-zacas",
Expand Down
14 changes: 7 additions & 7 deletions src/imp/atomic64/riscv32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ macro_rules! debug_assert_zacas {
}

// LLVM doesn't support `.option arch, +zabha` directive as of LLVM 19 because it is experimental.
// So, we currently always using .insn directive.
// `.insn <value>` directive requires LLVM 19.
// So, we currently always using .4byte directive.
// Note that `.insn <value>` directive requires LLVM 19.
// https://github.com/llvm/llvm-project/commit/2a086dce691e3cc34a2fc27f4fb255bb2cbbfac9
// // https://github.com/riscv-non-isa/riscv-asm-manual/blob/ad0de8c004e29c9a7ac33cfd054f4d4f9392f2fb/src/asm-manual.adoc#arch
// macro_rules! start_zacas {
Expand All @@ -75,7 +75,7 @@ macro_rules! debug_assert_zacas {
// }

// LLVM doesn't support `.option arch, +zabha` directive as of LLVM 19 because it is experimental.
// So, we currently always using .insn directive.
// So, we currently always using .4byte directive.
// macro_rules! atomic_rmw_amocas_order {
// ($op:ident, $order:ident) => {
// atomic_rmw_amocas_order!($op, $order, failure = $order)
Expand Down Expand Up @@ -175,7 +175,7 @@ unsafe fn atomic_load_zacas(src: *mut u64, order: Ordering) -> u64 {
unsafe {
let (out_lo, out_hi);
// LLVM doesn't support `.option arch, +zabha` directive as of LLVM 19 because it is experimental.
// So, we currently always using .insn directive.
// So, we currently always using .4byte directive.
// macro_rules! load {
// ($fence:tt, $asm_order:tt) => {
// asm!(
Expand All @@ -196,7 +196,7 @@ unsafe fn atomic_load_zacas(src: *mut u64, order: Ordering) -> u64 {
asm!(
$fence,
// 4: 2{8,c,a,e}c5362f amocas.d{,.aq,.rl,.aqrl} a2, a2, (a0)
concat!(".insn 0x2", $insn_order, "c5362f"),
concat!(".4byte 0x2", $insn_order, "c5362f"),
in("a0") ptr_reg!(src),
inout("a2") 0_u32 => out_lo,
inout("a3") 0_u32 => out_hi,
Expand Down Expand Up @@ -326,7 +326,7 @@ unsafe fn atomic_compare_exchange_zacas(
let new = U64 { whole: new };
let (prev_lo, prev_hi);
// LLVM doesn't support `.option arch, +zabha` directive as of LLVM 19 because it is experimental.
// So, we currently always using .insn directive.
// So, we currently always using .4byte directive.
// macro_rules! cmpxchg {
// ($fence:tt, $asm_order:tt) => {
// asm!(
Expand All @@ -351,7 +351,7 @@ unsafe fn atomic_compare_exchange_zacas(
asm!(
$fence,
// 10: 2{8,c,a,e}c5372f amocas.d{,.aq,.rl,.aqrl} a4, a2, (a0)
concat!(".insn 0x2", $insn_order, "c5372f"),
concat!(".4byte 0x2", $insn_order, "c5372f"),
in("a0") ptr_reg!(dst),
// must be allocated to even/odd register pair
inout("a4") old.pair.lo => prev_lo,
Expand Down
2 changes: 0 additions & 2 deletions src/imp/fallback/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ type and the value type must be the same.
target_arch = "riscv32",
not(any(miri, portable_atomic_sanitize_thread)),
not(portable_atomic_no_asm),
not(portable_atomic_pre_llvm_19),
any(
target_feature = "experimental-zacas",
portable_atomic_target_feature = "experimental-zacas",
Expand All @@ -64,7 +63,6 @@ type and the value type must be the same.
all(
target_arch = "riscv64",
not(portable_atomic_no_asm),
not(portable_atomic_pre_llvm_19),
any(
target_feature = "experimental-zacas",
portable_atomic_target_feature = "experimental-zacas",
Expand Down
Loading

0 comments on commit be4707d

Please sign in to comment.