From ae5e275cf68e836b8a8fd06fb24f620b681635f6 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Sun, 22 Sep 2024 00:53:12 +0900 Subject: [PATCH] x86_64,powerpc64: Improve detect helper macro --- src/imp/atomic128/powerpc64.rs | 93 ++++++++++++++++------------------ src/imp/atomic128/x86_64.rs | 66 ++++++++++++------------ 2 files changed, 76 insertions(+), 83 deletions(-) diff --git a/src/imp/atomic128/powerpc64.rs b/src/imp/atomic128/powerpc64.rs index fb3c9f0b..0e4d3534 100644 --- a/src/imp/atomic128/powerpc64.rs +++ b/src/imp/atomic128/powerpc64.rs @@ -128,11 +128,13 @@ fn extract_cr0(r: u64) -> bool { r & 0x20000000 != 0 } +// If quadword-atomics is available at compile-time, we can always use pwr8_fn. #[cfg(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", ))] use atomic_load_pwr8 as atomic_load; +// Otherwise, we need to do run-time detection and can use pwr8_fn only if quadword-atomics is available. #[cfg(not(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", @@ -238,11 +240,13 @@ unsafe fn atomic_load_pwr8(src: *mut u128, order: Ordering) -> u128 { } } +// If quadword-atomics is available at compile-time, we can always use pwr8_fn. #[cfg(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", ))] use atomic_store_pwr8 as atomic_store; +// Otherwise, we need to do run-time detection and can use pwr8_fn only if quadword-atomics is available. #[cfg(not(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", @@ -502,11 +506,6 @@ unsafe fn atomic_compare_exchange_weak_pwr8( } } -#[cfg(any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", -))] -use atomic_swap_pwr8 as atomic_swap; // Do not use atomic_rmw_ll_sc_3 because it needs extra MR to implement swap. #[inline] unsafe fn atomic_swap_pwr8(dst: *mut u128, val: u128, order: Ordering) -> u128 { @@ -553,12 +552,7 @@ unsafe fn atomic_swap_pwr8(dst: *mut u128, val: u128, order: Ordering) -> u128 { /// - r6/r7 pair: previous value loaded by ll (read-only for `$op`) /// - r8/r9 pair: new value that will be stored by sc macro_rules! atomic_rmw_ll_sc_3 { - ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => { - #[cfg(any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", - ))] - use $name as $reexport_name; + ($name:ident, [$($reg:tt)*], $($op:tt)*) => { #[inline] unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); @@ -607,12 +601,7 @@ macro_rules! atomic_rmw_ll_sc_3 { /// - r6/r7 pair: previous value loaded by ll (read-only for `$op`) /// - r8/r9 pair: new value that will be stored by sc macro_rules! atomic_rmw_ll_sc_2 { - ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => { - #[cfg(any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", - ))] - use $name as $reexport_name; + ($name:ident, [$($reg:tt)*], $($op:tt)*) => { #[inline] unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); @@ -653,37 +642,37 @@ macro_rules! atomic_rmw_ll_sc_2 { } atomic_rmw_ll_sc_3! { - atomic_add_pwr8 as atomic_add, [out("xer") _,], + atomic_add_pwr8, [out("xer") _,], "addc %r9, {val_lo}, %r7", "adde %r8, {val_hi}, %r6", } atomic_rmw_ll_sc_3! { - atomic_sub_pwr8 as atomic_sub, [out("xer") _,], + atomic_sub_pwr8, [out("xer") _,], "subc %r9, %r7, {val_lo}", "subfe %r8, {val_hi}, %r6", } atomic_rmw_ll_sc_3! { - atomic_and_pwr8 as atomic_and, [], + atomic_and_pwr8, [], "and %r9, {val_lo}, %r7", "and %r8, {val_hi}, %r6", } atomic_rmw_ll_sc_3! { - atomic_nand_pwr8 as atomic_nand, [], + atomic_nand_pwr8, [], "nand %r9, {val_lo}, %r7", "nand %r8, {val_hi}, %r6", } atomic_rmw_ll_sc_3! { - atomic_or_pwr8 as atomic_or, [], + atomic_or_pwr8, [], "or %r9, {val_lo}, %r7", "or %r8, {val_hi}, %r6", } atomic_rmw_ll_sc_3! { - atomic_xor_pwr8 as atomic_xor, [], + atomic_xor_pwr8, [], "xor %r9, {val_lo}, %r7", "xor %r8, {val_hi}, %r6", } atomic_rmw_ll_sc_3! { - atomic_max_pwr8 as atomic_max, [out("cr1") _,], + atomic_max_pwr8, [out("cr1") _,], "cmpld %r7, {val_lo}", // (unsigned) compare lo 64-bit, store result to cr0 "iselgt %r9, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0 "cmpd %cr1, %r6, {val_hi}", // (signed) compare hi 64-bit, store result to cr1 @@ -693,7 +682,7 @@ atomic_rmw_ll_sc_3! { "isel %r8, %r6, {val_hi}, 5", // select hi 64-bit based on GT bit in cr1 } atomic_rmw_ll_sc_3! { - atomic_umax_pwr8 as atomic_umax, [], + atomic_umax_pwr8, [], "cmpld %r7, {val_lo}", // compare lo 64-bit, store result to cr0 "iselgt %r9, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0 "cmpld %r6, {val_hi}", // compare hi 64-bit, store result to cr0 @@ -702,7 +691,7 @@ atomic_rmw_ll_sc_3! { "iselgt %r8, %r6, {val_hi}", // select hi 64-bit based on GT bit in cr0 } atomic_rmw_ll_sc_3! { - atomic_min_pwr8 as atomic_min, [out("cr1") _,], + atomic_min_pwr8, [out("cr1") _,], "cmpld %r7, {val_lo}", // (unsigned) compare lo 64-bit, store result to cr0 "isellt %r9, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0 "cmpd %cr1, %r6, {val_hi}", // (signed) compare hi 64-bit, store result to cr1 @@ -712,7 +701,7 @@ atomic_rmw_ll_sc_3! { "isel %r8, %r6, {val_hi}, 4", // select hi 64-bit based on LT bit in cr1 } atomic_rmw_ll_sc_3! { - atomic_umin_pwr8 as atomic_umin, [], + atomic_umin_pwr8, [], "cmpld %r7, {val_lo}", // compare lo 64-bit, store result to cr0 "isellt %r9, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0 "cmpld %r6, {val_hi}", // compare hi 64-bit, store result to cr0 @@ -721,11 +710,6 @@ atomic_rmw_ll_sc_3! { "isellt %r8, %r6, {val_hi}", // select hi 64-bit based on LT bit in cr0 } -#[cfg(any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", -))] -use atomic_not_pwr8 as atomic_not; #[inline] unsafe fn atomic_not_pwr8(dst: *mut u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. @@ -734,25 +718,32 @@ unsafe fn atomic_not_pwr8(dst: *mut u128, order: Ordering) -> u128 { #[cfg(not(portable_atomic_no_llvm_16))] atomic_rmw_ll_sc_2! { - atomic_neg_pwr8 as atomic_neg, [out("xer") _,], + atomic_neg_pwr8, [out("xer") _,], "subfic %r9, %r7, 0", "subfze %r8, %r6", } // LLVM 15 miscompiles subfic. #[cfg(portable_atomic_no_llvm_16)] atomic_rmw_ll_sc_2! { - atomic_neg_pwr8 as atomic_neg, [zero = in(reg) 0_u64, out("xer") _,], + atomic_neg_pwr8, [zero = in(reg) 0_u64, out("xer") _,], "subc %r9, {zero}, %r7", "subfze %r8, %r6", } -macro_rules! atomic_rmw_with_ifunc { +macro_rules! select_atomic_rmw { ( unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?; pwr8 = $pwr8_fn:ident; non_seqcst_fallback = $non_seqcst_fallback_fn:ident; seqcst_fallback = $seqcst_fallback_fn:ident; ) => { + // If quadword-atomics is available at compile-time, we can always use pwr8_fn. + #[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + ))] + use $pwr8_fn as $name; + // Otherwise, we need to do run-time detection and can use pwr8_fn only if quadword-atomics is available. #[cfg(not(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", @@ -826,85 +817,89 @@ macro_rules! atomic_rmw_with_ifunc { }; } -atomic_rmw_with_ifunc! { +#[cfg(not(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +)))] +select_atomic_rmw! { unsafe fn atomic_compare_exchange_ifunc(dst: *mut u128, old: u128, new: u128) -> (u128, bool); pwr8 = atomic_compare_exchange_pwr8; non_seqcst_fallback = atomic_compare_exchange_non_seqcst; seqcst_fallback = atomic_compare_exchange_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_swap_pwr8; non_seqcst_fallback = atomic_swap_non_seqcst; seqcst_fallback = atomic_swap_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_add_pwr8; non_seqcst_fallback = atomic_add_non_seqcst; seqcst_fallback = atomic_add_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_sub_pwr8; non_seqcst_fallback = atomic_sub_non_seqcst; seqcst_fallback = atomic_sub_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_and_pwr8; non_seqcst_fallback = atomic_and_non_seqcst; seqcst_fallback = atomic_and_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_nand_pwr8; non_seqcst_fallback = atomic_nand_non_seqcst; seqcst_fallback = atomic_nand_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_or_pwr8; non_seqcst_fallback = atomic_or_non_seqcst; seqcst_fallback = atomic_or_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_xor_pwr8; non_seqcst_fallback = atomic_xor_non_seqcst; seqcst_fallback = atomic_xor_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_max_pwr8; non_seqcst_fallback = atomic_max_non_seqcst; seqcst_fallback = atomic_max_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_umax_pwr8; non_seqcst_fallback = atomic_umax_non_seqcst; seqcst_fallback = atomic_umax_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_min_pwr8; non_seqcst_fallback = atomic_min_non_seqcst; seqcst_fallback = atomic_min_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_umin_pwr8; non_seqcst_fallback = atomic_umin_non_seqcst; seqcst_fallback = atomic_umin_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_not(dst: *mut u128) -> u128; pwr8 = atomic_not_pwr8; non_seqcst_fallback = atomic_not_non_seqcst; seqcst_fallback = atomic_not_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_neg(dst: *mut u128) -> u128; pwr8 = atomic_neg_pwr8; non_seqcst_fallback = atomic_neg_non_seqcst; diff --git a/src/imp/atomic128/x86_64.rs b/src/imp/atomic128/x86_64.rs index 3e86d658..5116d40a 100644 --- a/src/imp/atomic128/x86_64.rs +++ b/src/imp/atomic128/x86_64.rs @@ -440,8 +440,6 @@ unsafe fn atomic_compare_exchange( // cmpxchg16b is always strong. use atomic_compare_exchange as atomic_compare_exchange_weak; -#[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] -use atomic_swap_cmpxchg16b as atomic_swap; // See cmpxchg16b() for target_feature(enable). #[cfg_attr( not(portable_atomic_no_cmpxchg16b_target_feature), @@ -513,9 +511,7 @@ unsafe fn atomic_swap_cmpxchg16b(dst: *mut u128, val: u128, _order: Ordering) -> // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows // omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx. macro_rules! atomic_rmw_cas_3 { - ($name:ident as $reexport_name:ident, $($op:tt)*) => { - #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] - use $name as $reexport_name; + ($name:ident, $($op:tt)*) => { // See cmpxchg16b() for target_feature(enable). #[cfg_attr( not(portable_atomic_no_cmpxchg16b_target_feature), @@ -584,9 +580,7 @@ macro_rules! atomic_rmw_cas_3 { // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows // omitting the storing of condition flags and avoid use of xchg to handle rbx. macro_rules! atomic_rmw_cas_2 { - ($name:ident as $reexport_name:ident, $($op:tt)*) => { - #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] - use $name as $reexport_name; + ($name:ident, $($op:tt)*) => { // See cmpxchg16b() for target_feature(enable). #[cfg_attr( not(portable_atomic_no_cmpxchg16b_target_feature), @@ -645,28 +639,28 @@ macro_rules! atomic_rmw_cas_2 { } atomic_rmw_cas_3! { - atomic_add_cmpxchg16b as atomic_add, + atomic_add_cmpxchg16b, "mov rbx, rax", "add rbx, rsi", "mov rcx, rdx", "adc rcx, r8", } atomic_rmw_cas_3! { - atomic_sub_cmpxchg16b as atomic_sub, + atomic_sub_cmpxchg16b, "mov rbx, rax", "sub rbx, rsi", "mov rcx, rdx", "sbb rcx, r8", } atomic_rmw_cas_3! { - atomic_and_cmpxchg16b as atomic_and, + atomic_and_cmpxchg16b, "mov rbx, rax", "and rbx, rsi", "mov rcx, rdx", "and rcx, r8", } atomic_rmw_cas_3! { - atomic_nand_cmpxchg16b as atomic_nand, + atomic_nand_cmpxchg16b, "mov rbx, rax", "and rbx, rsi", "not rbx", @@ -675,14 +669,14 @@ atomic_rmw_cas_3! { "not rcx", } atomic_rmw_cas_3! { - atomic_or_cmpxchg16b as atomic_or, + atomic_or_cmpxchg16b, "mov rbx, rax", "or rbx, rsi", "mov rcx, rdx", "or rcx, r8", } atomic_rmw_cas_3! { - atomic_xor_cmpxchg16b as atomic_xor, + atomic_xor_cmpxchg16b, "mov rbx, rax", "xor rbx, rsi", "mov rcx, rdx", @@ -690,14 +684,14 @@ atomic_rmw_cas_3! { } atomic_rmw_cas_2! { - atomic_not_cmpxchg16b as atomic_not, + atomic_not_cmpxchg16b, "mov rbx, rax", "not rbx", "mov rcx, rdx", "not rcx", } atomic_rmw_cas_2! { - atomic_neg_cmpxchg16b as atomic_neg, + atomic_neg_cmpxchg16b, "mov rbx, rax", "neg rbx", "mov rcx, 0", @@ -705,7 +699,7 @@ atomic_rmw_cas_2! { } atomic_rmw_cas_3! { - atomic_max_cmpxchg16b as atomic_max, + atomic_max_cmpxchg16b, "cmp rsi, rax", "mov rcx, r8", "sbb rcx, rdx", @@ -715,7 +709,7 @@ atomic_rmw_cas_3! { "cmovl rbx, rax", } atomic_rmw_cas_3! { - atomic_umax_cmpxchg16b as atomic_umax, + atomic_umax_cmpxchg16b, "cmp rsi, rax", "mov rcx, r8", "sbb rcx, rdx", @@ -725,7 +719,7 @@ atomic_rmw_cas_3! { "cmovb rbx, rax", } atomic_rmw_cas_3! { - atomic_min_cmpxchg16b as atomic_min, + atomic_min_cmpxchg16b, "cmp rsi, rax", "mov rcx, r8", "sbb rcx, rdx", @@ -735,7 +729,7 @@ atomic_rmw_cas_3! { "cmovge rbx, rax", } atomic_rmw_cas_3! { - atomic_umin_cmpxchg16b as atomic_umin, + atomic_umin_cmpxchg16b, "cmp rsi, rax", "mov rcx, r8", "sbb rcx, rdx", @@ -745,12 +739,16 @@ atomic_rmw_cas_3! { "cmovae rbx, rax", } -macro_rules! atomic_rmw_with_ifunc { +macro_rules! select_atomic_rmw { ( unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?; cmpxchg16b = $cmpxchg16b_fn:ident; fallback = $seqcst_fallback_fn:ident; ) => { + // If cmpxchg16b is available at compile-time, we can always use cmpxchg16b_fn. + #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] + use $cmpxchg16b_fn as $name; + // Otherwise, we need to do run-time detection and can use cmpxchg16b_fn only if cmpxchg16b is available. #[cfg(not(any( target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b", @@ -783,67 +781,67 @@ macro_rules! atomic_rmw_with_ifunc { }; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_swap_cmpxchg16b; fallback = atomic_swap_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_add_cmpxchg16b; fallback = atomic_add_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_sub_cmpxchg16b; fallback = atomic_sub_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_and_cmpxchg16b; fallback = atomic_and_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_nand_cmpxchg16b; fallback = atomic_nand_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_or_cmpxchg16b; fallback = atomic_or_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_xor_cmpxchg16b; fallback = atomic_xor_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_max_cmpxchg16b; fallback = atomic_max_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_umax_cmpxchg16b; fallback = atomic_umax_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_min_cmpxchg16b; fallback = atomic_min_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_umin_cmpxchg16b; fallback = atomic_umin_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_not(dst: *mut u128) -> u128; cmpxchg16b = atomic_not_cmpxchg16b; fallback = atomic_not_seqcst; } -atomic_rmw_with_ifunc! { +select_atomic_rmw! { unsafe fn atomic_neg(dst: *mut u128) -> u128; cmpxchg16b = atomic_neg_cmpxchg16b; fallback = atomic_neg_seqcst;