From 9dcaa15a8dcb0089487d68230551ef2bd84d2bd7 Mon Sep 17 00:00:00 2001 From: joboet Date: Tue, 25 Jun 2024 18:30:49 +0200 Subject: [PATCH] std: separate TLS key creation from TLS access Currently, `std` performs an atomic load to get the OS key on every access to `StaticKey` even when the key is already known. This PR thus replaces `StaticKey` with the platform-specific `get` and `set` function and a new `LazyKey` type that acts as a `LazyLock`, allowing the reuse of the retreived key for multiple accesses. --- std/src/sys/thread_local/guard/key.rs | 6 +-- std/src/sys/thread_local/key/racy.rs | 56 ++++--------------------- std/src/sys/thread_local/key/tests.rs | 39 ++++++++++------- std/src/sys/thread_local/key/unix.rs | 1 + std/src/sys/thread_local/key/windows.rs | 56 ++++++++++++------------- std/src/sys/thread_local/key/xous.rs | 2 +- std/src/sys/thread_local/mod.rs | 21 ++++++---- std/src/sys/thread_local/os.rs | 44 +++++++++---------- 8 files changed, 100 insertions(+), 125 deletions(-) diff --git a/std/src/sys/thread_local/guard/key.rs b/std/src/sys/thread_local/guard/key.rs index ee9d55ddd5e8e..67c3ca8862767 100644 --- a/std/src/sys/thread_local/guard/key.rs +++ b/std/src/sys/thread_local/guard/key.rs @@ -4,15 +4,15 @@ use crate::ptr; use crate::sys::thread_local::destructors; -use crate::sys::thread_local::key::StaticKey; +use crate::sys::thread_local::key::{set, LazyKey}; pub fn enable() { - static DTORS: StaticKey = StaticKey::new(Some(run)); + static DTORS: LazyKey = LazyKey::new(Some(run)); // Setting the key value to something other than NULL will result in the // destructor being run at thread exit. unsafe { - DTORS.set(ptr::without_provenance_mut(1)); + set(DTORS.force(), ptr::without_provenance_mut(1)); } unsafe extern "C" fn run(_: *mut u8) { diff --git a/std/src/sys/thread_local/key/racy.rs b/std/src/sys/thread_local/key/racy.rs index eda8b83bc7f03..69f11458c3289 100644 --- a/std/src/sys/thread_local/key/racy.rs +++ b/std/src/sys/thread_local/key/racy.rs @@ -1,4 +1,4 @@ -//! A `StaticKey` implementation using racy initialization. +//! A `LazyKey` implementation using racy initialization. //! //! Unfortunately, none of the platforms currently supported by `std` allows //! creating TLS keys at compile-time. Thus we need a way to lazily create keys. @@ -10,34 +10,12 @@ use crate::sync::atomic::{self, AtomicUsize, Ordering}; /// A type for TLS keys that are statically allocated. /// -/// This type is entirely `unsafe` to use as it does not protect against -/// use-after-deallocation or use-during-deallocation. -/// -/// The actual OS-TLS key is lazily allocated when this is used for the first -/// time. The key is also deallocated when the Rust runtime exits or `destroy` -/// is called, whichever comes first. -/// -/// # Examples -/// -/// ```ignore (cannot-doctest-private-modules) -/// use tls::os::{StaticKey, INIT}; -/// -/// // Use a regular global static to store the key. -/// static KEY: StaticKey = INIT; -/// -/// // The state provided via `get` and `set` is thread-local. -/// unsafe { -/// assert!(KEY.get().is_null()); -/// KEY.set(1 as *mut u8); -/// } -/// ``` -pub struct StaticKey { +/// This is basically a `LazyLock`, but avoids blocking and circular +/// dependencies with the rest of `std`. +pub struct LazyKey { /// Inner static TLS key (internals). key: AtomicUsize, /// Destructor for the TLS value. - /// - /// See `Key::new` for information about when the destructor runs and how - /// it runs. dtor: Option, } @@ -51,32 +29,14 @@ const KEY_SENTVAL: usize = 0; #[cfg(target_os = "nto")] const KEY_SENTVAL: usize = libc::PTHREAD_KEYS_MAX + 1; -impl StaticKey { +impl LazyKey { #[rustc_const_unstable(feature = "thread_local_internals", issue = "none")] - pub const fn new(dtor: Option) -> StaticKey { - StaticKey { key: atomic::AtomicUsize::new(KEY_SENTVAL), dtor } - } - - /// Gets the value associated with this TLS key - /// - /// This will lazily allocate a TLS key from the OS if one has not already - /// been allocated. - #[inline] - pub unsafe fn get(&self) -> *mut u8 { - unsafe { super::get(self.key()) } - } - - /// Sets this TLS key to a new value. - /// - /// This will lazily allocate a TLS key from the OS if one has not already - /// been allocated. - #[inline] - pub unsafe fn set(&self, val: *mut u8) { - unsafe { super::set(self.key(), val) } + pub const fn new(dtor: Option) -> LazyKey { + LazyKey { key: atomic::AtomicUsize::new(KEY_SENTVAL), dtor } } #[inline] - fn key(&self) -> super::Key { + pub fn force(&self) -> super::Key { match self.key.load(Ordering::Acquire) { KEY_SENTVAL => self.lazy_init() as super::Key, n => n as super::Key, diff --git a/std/src/sys/thread_local/key/tests.rs b/std/src/sys/thread_local/key/tests.rs index 24cad396da269..d82b34e71f0e4 100644 --- a/std/src/sys/thread_local/key/tests.rs +++ b/std/src/sys/thread_local/key/tests.rs @@ -1,18 +1,25 @@ -use super::StaticKey; +use super::{get, set, LazyKey}; use crate::ptr; #[test] fn smoke() { - static K1: StaticKey = StaticKey::new(None); - static K2: StaticKey = StaticKey::new(None); + static K1: LazyKey = LazyKey::new(None); + static K2: LazyKey = LazyKey::new(None); + + let k1 = K1.force(); + let k2 = K2.force(); + assert_ne!(k1, k2); + + assert_eq!(K1.force(), k1); + assert_eq!(K2.force(), k2); unsafe { - assert!(K1.get().is_null()); - assert!(K2.get().is_null()); - K1.set(ptr::without_provenance_mut(1)); - K2.set(ptr::without_provenance_mut(2)); - assert_eq!(K1.get() as usize, 1); - assert_eq!(K2.get() as usize, 2); + assert!(get(k1).is_null()); + assert!(get(k2).is_null()); + set(k1, ptr::without_provenance_mut(1)); + set(k2, ptr::without_provenance_mut(2)); + assert_eq!(get(k1) as usize, 1); + assert_eq!(get(k2) as usize, 2); } } @@ -26,25 +33,27 @@ fn destructors() { drop(unsafe { Arc::from_raw(ptr as *const ()) }); } - static KEY: StaticKey = StaticKey::new(Some(destruct)); + static KEY: LazyKey = LazyKey::new(Some(destruct)); let shared1 = Arc::new(()); let shared2 = Arc::clone(&shared1); + let key = KEY.force(); unsafe { - assert!(KEY.get().is_null()); - KEY.set(Arc::into_raw(shared1) as *mut u8); + assert!(get(key).is_null()); + set(key, Arc::into_raw(shared1) as *mut u8); } thread::spawn(move || unsafe { - assert!(KEY.get().is_null()); - KEY.set(Arc::into_raw(shared2) as *mut u8); + let key = KEY.force(); + assert!(get(key).is_null()); + set(key, Arc::into_raw(shared2) as *mut u8); }) .join() .unwrap(); // Leak the Arc, let the TLS destructor clean it up. - let shared1 = unsafe { ManuallyDrop::new(Arc::from_raw(KEY.get() as *const ())) }; + let shared1 = unsafe { ManuallyDrop::new(Arc::from_raw(get(key) as *const ())) }; assert_eq!( Arc::strong_count(&shared1), 1, diff --git a/std/src/sys/thread_local/key/unix.rs b/std/src/sys/thread_local/key/unix.rs index 13522d44b35dc..28e48a750b9bf 100644 --- a/std/src/sys/thread_local/key/unix.rs +++ b/std/src/sys/thread_local/key/unix.rs @@ -16,6 +16,7 @@ pub unsafe fn set(key: Key, value: *mut u8) { } #[inline] +#[cfg(any(not(target_thread_local), test))] pub unsafe fn get(key: Key) -> *mut u8 { unsafe { libc::pthread_getspecific(key) as *mut u8 } } diff --git a/std/src/sys/thread_local/key/windows.rs b/std/src/sys/thread_local/key/windows.rs index ad0e72c29edaf..baf23979c7c61 100644 --- a/std/src/sys/thread_local/key/windows.rs +++ b/std/src/sys/thread_local/key/windows.rs @@ -1,4 +1,4 @@ -//! Implementation of `StaticKey` for Windows. +//! Implementation of `LazyKey` for Windows. //! //! Windows has no native support for running destructors so we manage our own //! list of destructors to keep track of how to destroy keys. We then install a @@ -13,9 +13,9 @@ //! don't reach a fixed point after a short while then we just inevitably leak //! something. //! -//! The list is implemented as an atomic single-linked list of `StaticKey`s and +//! The list is implemented as an atomic single-linked list of `LazyKey`s and //! does not support unregistration. Unfortunately, this means that we cannot -//! use racy initialization for creating the keys in `StaticKey`, as that could +//! use racy initialization for creating the keys in `LazyKey`, as that could //! result in destructors being missed. Hence, we synchronize the creation of //! keys with destructors through [`INIT_ONCE`](c::INIT_ONCE) (`std`'s //! [`Once`](crate::sync::Once) cannot be used since it might use TLS itself). @@ -33,26 +33,26 @@ use crate::sync::atomic::{ use crate::sys::c; use crate::sys::thread_local::guard; -type Key = c::DWORD; +pub type Key = c::DWORD; type Dtor = unsafe extern "C" fn(*mut u8); -pub struct StaticKey { +pub struct LazyKey { /// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX /// is not a valid key value, this allows us to use zero as sentinel value /// without risking overflow. key: AtomicU32, dtor: Option, - next: AtomicPtr, + next: AtomicPtr, /// Currently, destructors cannot be unregistered, so we cannot use racy /// initialization for keys. Instead, we need synchronize initialization. /// Use the Windows-provided `Once` since it does not require TLS. once: UnsafeCell, } -impl StaticKey { +impl LazyKey { #[inline] - pub const fn new(dtor: Option) -> StaticKey { - StaticKey { + pub const fn new(dtor: Option) -> LazyKey { + LazyKey { key: AtomicU32::new(0), dtor, next: AtomicPtr::new(ptr::null_mut()), @@ -61,18 +61,7 @@ impl StaticKey { } #[inline] - pub unsafe fn set(&'static self, val: *mut u8) { - let r = unsafe { c::TlsSetValue(self.key(), val.cast()) }; - debug_assert_eq!(r, c::TRUE); - } - - #[inline] - pub unsafe fn get(&'static self) -> *mut u8 { - unsafe { c::TlsGetValue(self.key()).cast() } - } - - #[inline] - fn key(&'static self) -> Key { + pub fn force(&'static self) -> Key { match self.key.load(Acquire) { 0 => unsafe { self.init() }, key => key - 1, @@ -141,17 +130,28 @@ impl StaticKey { } } -unsafe impl Send for StaticKey {} -unsafe impl Sync for StaticKey {} +unsafe impl Send for LazyKey {} +unsafe impl Sync for LazyKey {} + +#[inline] +pub unsafe fn set(key: Key, val: *mut u8) { + let r = unsafe { c::TlsSetValue(key, val.cast()) }; + debug_assert_eq!(r, c::TRUE); +} + +#[inline] +pub unsafe fn get(key: Key) -> *mut u8 { + unsafe { c::TlsGetValue(key).cast() } +} -static DTORS: AtomicPtr = AtomicPtr::new(ptr::null_mut()); +static DTORS: AtomicPtr = AtomicPtr::new(ptr::null_mut()); /// Should only be called once per key, otherwise loops or breaks may occur in /// the linked list. -unsafe fn register_dtor(key: &'static StaticKey) { +unsafe fn register_dtor(key: &'static LazyKey) { guard::enable(); - let this = <*const StaticKey>::cast_mut(key); + let this = <*const LazyKey>::cast_mut(key); // Use acquire ordering to pass along the changes done by the previously // registered keys when we store the new head with release ordering. let mut head = DTORS.load(Acquire); @@ -176,9 +176,9 @@ pub unsafe fn run_dtors() { let dtor = unsafe { (*cur).dtor.unwrap() }; cur = unsafe { (*cur).next.load(Relaxed) }; - // In StaticKey::init, we register the dtor before setting `key`. + // In LazyKey::init, we register the dtor before setting `key`. // So if one thread's `run_dtors` races with another thread executing `init` on the same - // `StaticKey`, we can encounter a key of 0 here. That means this key was never + // `LazyKey`, we can encounter a key of 0 here. That means this key was never // initialized in this thread so we can safely skip it. if pre_key == 0 { continue; diff --git a/std/src/sys/thread_local/key/xous.rs b/std/src/sys/thread_local/key/xous.rs index a23f6de95f7b5..5a837a33e190e 100644 --- a/std/src/sys/thread_local/key/xous.rs +++ b/std/src/sys/thread_local/key/xous.rs @@ -30,7 +30,7 @@ //! really. //! //! Perhaps one day we can fold the `Box` here into a static allocation, -//! expanding the `StaticKey` structure to contain not only a slot for the TLS +//! expanding the `LazyKey` structure to contain not only a slot for the TLS //! key but also a slot for the destructor queue on windows. An optimization for //! another day! diff --git a/std/src/sys/thread_local/mod.rs b/std/src/sys/thread_local/mod.rs index f74fd828cbe50..3d1b91a7ea095 100644 --- a/std/src/sys/thread_local/mod.rs +++ b/std/src/sys/thread_local/mod.rs @@ -36,7 +36,7 @@ cfg_if::cfg_if! { pub use native::{EagerStorage, LazyStorage, thread_local_inner}; } else { mod os; - pub use os::{Key, thread_local_inner}; + pub use os::{Storage, thread_local_inner}; } } @@ -126,28 +126,33 @@ pub(crate) mod key { mod unix; #[cfg(test)] mod tests; - pub(super) use racy::StaticKey; - use unix::{Key, create, destroy, get, set}; + pub(super) use racy::LazyKey; + pub(super) use unix::{Key, set}; + #[cfg(any(not(target_thread_local), test))] + pub(super) use unix::get; + use unix::{create, destroy}; } else if #[cfg(all(not(target_thread_local), target_os = "windows"))] { #[cfg(test)] mod tests; mod windows; - pub(super) use windows::{StaticKey, run_dtors}; + pub(super) use windows::{Key, LazyKey, get, run_dtors, set}; } else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] { mod racy; mod sgx; #[cfg(test)] mod tests; - pub(super) use racy::StaticKey; - use sgx::{Key, create, destroy, get, set}; + pub(super) use racy::LazyKey; + pub(super) use sgx::{Key, get, set}; + use sgx::{create, destroy}; } else if #[cfg(target_os = "xous")] { mod racy; #[cfg(test)] mod tests; mod xous; - pub(super) use racy::StaticKey; + pub(super) use racy::LazyKey; pub(crate) use xous::destroy_tls; - use xous::{Key, create, destroy, get, set}; + pub(super) use xous::{Key, get, set}; + use xous::{create, destroy}; } } } diff --git a/std/src/sys/thread_local/os.rs b/std/src/sys/thread_local/os.rs index 6980c897fdb53..625943bb25512 100644 --- a/std/src/sys/thread_local/os.rs +++ b/std/src/sys/thread_local/os.rs @@ -2,7 +2,7 @@ use super::abort_on_dtor_unwind; use crate::cell::Cell; use crate::marker::PhantomData; use crate::ptr; -use crate::sys::thread_local::key::StaticKey as OsKey; +use crate::sys::thread_local::key::{get, set, Key, LazyKey}; #[doc(hidden)] #[allow_internal_unstable(thread_local_internals)] @@ -22,12 +22,12 @@ pub macro thread_local_inner { unsafe { use $crate::thread::LocalKey; - use $crate::thread::local_impl::Key; + use $crate::thread::local_impl::Storage; // Inlining does not work on windows-gnu due to linking errors around // dllimports. See https://github.com/rust-lang/rust/issues/109797. LocalKey::new(#[cfg_attr(windows, inline(never))] |init| { - static VAL: Key<$t> = Key::new(); + static VAL: Storage<$t> = Storage::new(); VAL.get(init, __init) }) } @@ -41,22 +41,22 @@ pub macro thread_local_inner { /// Use a regular global static to store this key; the state provided will then be /// thread-local. #[allow(missing_debug_implementations)] -pub struct Key { - os: OsKey, +pub struct Storage { + key: LazyKey, marker: PhantomData>, } -unsafe impl Sync for Key {} +unsafe impl Sync for Storage {} struct Value { value: T, - key: &'static Key, + key: Key, } -impl Key { +impl Storage { #[rustc_const_unstable(feature = "thread_local_internals", issue = "none")] - pub const fn new() -> Key { - Key { os: OsKey::new(Some(destroy_value::)), marker: PhantomData } + pub const fn new() -> Storage { + Storage { key: LazyKey::new(Some(destroy_value::)), marker: PhantomData } } /// Get a pointer to the TLS value, potentially initializing it with the @@ -66,19 +66,19 @@ impl Key { /// The resulting pointer may not be used after reentrant inialialization /// or thread destruction has occurred. pub fn get(&'static self, i: Option<&mut Option>, f: impl FnOnce() -> T) -> *const T { - // SAFETY: (FIXME: get should actually be safe) - let ptr = unsafe { self.os.get() as *mut Value }; + let key = self.key.force(); + let ptr = unsafe { get(key) as *mut Value }; if ptr.addr() > 1 { // SAFETY: the check ensured the pointer is safe (its destructor // is not running) + it is coming from a trusted source (self). unsafe { &(*ptr).value } } else { - self.try_initialize(ptr, i, f) + unsafe { Self::try_initialize(key, ptr, i, f) } } } - fn try_initialize( - &'static self, + unsafe fn try_initialize( + key: Key, ptr: *mut Value, i: Option<&mut Option>, f: impl FnOnce() -> T, @@ -88,13 +88,13 @@ impl Key { return ptr::null(); } - let value = i.and_then(Option::take).unwrap_or_else(f); - let ptr = Box::into_raw(Box::new(Value { value, key: self })); - // SAFETY: (FIXME: get should actually be safe) - let old = unsafe { self.os.get() as *mut Value }; + let value = Box::new(Value { value: i.and_then(Option::take).unwrap_or_else(f), key }); + let ptr = Box::into_raw(value); + + let old = unsafe { get(key) as *mut Value }; // SAFETY: `ptr` is a correct pointer that can be destroyed by the key destructor. unsafe { - self.os.set(ptr as *mut u8); + set(key, ptr as *mut u8); } if !old.is_null() { // If the variable was recursively initialized, drop the old value. @@ -123,8 +123,8 @@ unsafe extern "C" fn destroy_value(ptr: *mut u8) { abort_on_dtor_unwind(|| { let ptr = unsafe { Box::from_raw(ptr as *mut Value) }; let key = ptr.key; - unsafe { key.os.set(ptr::without_provenance_mut(1)) }; + unsafe { set(key, ptr::without_provenance_mut(1)) }; drop(ptr); - unsafe { key.os.set(ptr::null_mut()) }; + unsafe { set(key, ptr::null_mut()) }; }); }