From f13ff4bad951dc6dee527a2dc13db0065c19a47e Mon Sep 17 00:00:00 2001 From: joboet Date: Mon, 25 Nov 2024 13:01:35 +0100 Subject: [PATCH] std: lazily allocate the main thread handle #123550 eliminated the allocation of the main thread handle, but at the cost of greatly increased complexity. This PR proposes another approach: Instead of creating the main thread handle itself, the runtime simply remembers the thread ID of the main thread. The main thread handle is then only allocated when it is used, using the same lazy-initialization mechanism as for non-runtime use of `thread::current`, and the `name` method uses the thread ID to identify the main thread handle and return the correct name ("main") for it. Thereby, we also allow accessing thread::current before main: as the runtime no longer tries to install its own handle, this will no longer trigger an abort. Rather, the name returned from name will only be "main" after the runtime initialization code has run, but I think that is acceptable. This new approach also requires some changes to the signal handling code, as calling `thread::current` would now allocate when called on the main thread, which is not acceptable. I fixed this by adding a new function (`with_current_name`) that performs all the naming logic without allocation or without initializing the thread ID (which could allocate on some platforms). --- library/std/src/panicking.rs | 7 +- library/std/src/rt.rs | 23 +-- .../std/src/sys/pal/unix/stack_overflow.rs | 9 +- .../std/src/sys/pal/windows/stack_overflow.rs | 8 +- library/std/src/thread/current.rs | 36 ++--- library/std/src/thread/mod.rs | 150 ++++++++++++------ 6 files changed, 134 insertions(+), 99 deletions(-) diff --git a/library/std/src/panicking.rs b/library/std/src/panicking.rs index ac1f547c9143f..f2904688b5b40 100644 --- a/library/std/src/panicking.rs +++ b/library/std/src/panicking.rs @@ -247,15 +247,16 @@ fn default_hook(info: &PanicHookInfo<'_>) { let location = info.location().unwrap(); let msg = payload_as_str(info.payload()); - let thread = thread::try_current(); - let name = thread.as_ref().and_then(|t| t.name()).unwrap_or(""); let write = #[optimize(size)] |err: &mut dyn crate::io::Write| { // Use a lock to prevent mixed output in multithreading context. // Some platforms also require it when printing a backtrace, like `SymFromAddr` on Windows. let mut lock = backtrace::lock(); - let _ = writeln!(err, "thread '{name}' panicked at {location}:\n{msg}"); + thread::with_current_name(|name| { + let name = name.unwrap_or(""); + let _ = writeln!(err, "thread '{name}' panicked at {location}:\n{msg}"); + }); static FIRST_PANIC: AtomicBool = AtomicBool::new(true); diff --git a/library/std/src/rt.rs b/library/std/src/rt.rs index 80e7c3c026bd7..4301f509ba32f 100644 --- a/library/std/src/rt.rs +++ b/library/std/src/rt.rs @@ -23,7 +23,7 @@ pub use core::panicking::{panic_display, panic_fmt}; #[rustfmt::skip] use crate::any::Any; use crate::sync::Once; -use crate::thread::{self, Thread}; +use crate::thread::{self, main_thread}; use crate::{mem, panic, sys}; // Prints to the "panic output", depending on the platform this may be: @@ -102,24 +102,9 @@ unsafe fn init(argc: isize, argv: *const *const u8, sigpipe: u8) { sys::init(argc, argv, sigpipe) }; - // Set up the current thread handle to give it the right name. - // - // When code running before main uses `ReentrantLock` (for example by - // using `println!`), the thread ID can become initialized before we - // create this handle. Since `set_current` fails when the ID of the - // handle does not match the current ID, we should attempt to use the - // current thread ID here instead of unconditionally creating a new - // one. Also see #130210. - let thread = Thread::new_main(thread::current_id()); - if let Err(_thread) = thread::set_current(thread) { - // `thread::current` will create a new handle if none has been set yet. - // Thus, if someone uses it before main, this call will fail. That's a - // bad idea though, as we then cannot set the main thread name here. - // - // FIXME: detect the main thread in `thread::current` and use the - // correct name there. - rtabort!("code running before main must not use thread::current"); - } + // Remember the main thread ID to give it the correct name. + // SAFETY: this is the only time and place where we call this function. + unsafe { main_thread::set(thread::current_id()) }; } /// Clean up the thread-local runtime state. This *should* be run after all other diff --git a/library/std/src/sys/pal/unix/stack_overflow.rs b/library/std/src/sys/pal/unix/stack_overflow.rs index 69b31da427fcb..db5c6bd3a1c32 100644 --- a/library/std/src/sys/pal/unix/stack_overflow.rs +++ b/library/std/src/sys/pal/unix/stack_overflow.rs @@ -100,10 +100,11 @@ mod imp { // If the faulting address is within the guard page, then we print a // message saying so and abort. if start <= addr && addr < end { - rtprintpanic!( - "\nthread '{}' has overflowed its stack\n", - thread::current().name().unwrap_or("") - ); + thread::with_current_name(|name| { + let name = name.unwrap_or(""); + rtprintpanic!("\nthread '{name}' has overflowed its stack\n"); + }); + rtabort!("stack overflow"); } else { // Unregister ourselves by reverting back to the default behavior. diff --git a/library/std/src/sys/pal/windows/stack_overflow.rs b/library/std/src/sys/pal/windows/stack_overflow.rs index 467e21ab56a28..734cd30bed08f 100644 --- a/library/std/src/sys/pal/windows/stack_overflow.rs +++ b/library/std/src/sys/pal/windows/stack_overflow.rs @@ -18,10 +18,10 @@ unsafe extern "system" fn vectored_handler(ExceptionInfo: *mut c::EXCEPTION_POIN let code = rec.ExceptionCode; if code == c::EXCEPTION_STACK_OVERFLOW { - rtprintpanic!( - "\nthread '{}' has overflowed its stack\n", - thread::current().name().unwrap_or("") - ); + thread::with_current_name(|name| { + let name = name.unwrap_or(""); + rtprintpanic!("\nthread '{name}' has overflowed its stack\n"); + }); } c::EXCEPTION_CONTINUE_SEARCH } diff --git a/library/std/src/thread/current.rs b/library/std/src/thread/current.rs index b9b959f98946b..a2a780790e21b 100644 --- a/library/std/src/thread/current.rs +++ b/library/std/src/thread/current.rs @@ -15,7 +15,7 @@ local_pointer! { /// /// We store the thread ID so that it never gets destroyed during the lifetime /// of a thread, either using `#[thread_local]` or multiple `local_pointer!`s. -mod id { +pub(super) mod id { use super::*; cfg_if::cfg_if! { @@ -27,7 +27,7 @@ mod id { pub(super) const CHEAP: bool = true; - pub(super) fn get() -> Option { + pub(crate) fn get() -> Option { ID.get() } @@ -44,7 +44,7 @@ mod id { pub(super) const CHEAP: bool = false; - pub(super) fn get() -> Option { + pub(crate) fn get() -> Option { let id0 = ID0.get().addr() as u64; let id16 = ID16.get().addr() as u64; let id32 = ID32.get().addr() as u64; @@ -67,7 +67,7 @@ mod id { pub(super) const CHEAP: bool = false; - pub(super) fn get() -> Option { + pub(crate) fn get() -> Option { let id0 = ID0.get().addr() as u64; let id32 = ID32.get().addr() as u64; ThreadId::from_u64((id32 << 32) + id0) @@ -85,7 +85,7 @@ mod id { pub(super) const CHEAP: bool = true; - pub(super) fn get() -> Option { + pub(crate) fn get() -> Option { let id = ID.get().addr() as u64; ThreadId::from_u64(id) } @@ -112,7 +112,7 @@ mod id { /// Tries to set the thread handle for the current thread. Fails if a handle was /// already set or if the thread ID of `thread` would change an already-set ID. -pub(crate) fn set_current(thread: Thread) -> Result<(), Thread> { +pub(super) fn set_current(thread: Thread) -> Result<(), Thread> { if CURRENT.get() != NONE { return Err(thread); } @@ -140,28 +140,28 @@ pub(crate) fn current_id() -> ThreadId { // to retrieve it from the current thread handle, which will only take one // TLS access. if !id::CHEAP { - let current = CURRENT.get(); - if current > DESTROYED { - unsafe { - let current = ManuallyDrop::new(Thread::from_raw(current)); - return current.id(); - } + if let Some(id) = try_with_current(|t| t.map(|t| t.id())) { + return id; } } id::get_or_init() } -/// Gets a handle to the thread that invokes it, if the handle has been initialized. -pub(crate) fn try_current() -> Option { +/// Gets a reference to the handle of the thread that invokes it, if the handle +/// has been initialized. +pub(super) fn try_with_current(f: F) -> R +where + F: FnOnce(Option<&Thread>) -> R, +{ let current = CURRENT.get(); if current > DESTROYED { unsafe { let current = ManuallyDrop::new(Thread::from_raw(current)); - Some((*current).clone()) + f(Some(¤t)) } } else { - None + f(None) } } @@ -176,7 +176,7 @@ pub(crate) fn current_or_unnamed() -> Thread { (*current).clone() } } else if current == DESTROYED { - Thread::new_unnamed(id::get_or_init()) + Thread::new(id::get_or_init(), None) } else { init_current(current) } @@ -221,7 +221,7 @@ fn init_current(current: *mut ()) -> Thread { CURRENT.set(BUSY); // If the thread ID was initialized already, use it. let id = id::get_or_init(); - let thread = Thread::new_unnamed(id); + let thread = Thread::new(id, None); // Make sure that `crate::rt::thread_cleanup` will be run, which will // call `drop_current`. diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index 320372b9c1557..38b8c1ddcd3bf 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -183,7 +183,8 @@ mod current; #[stable(feature = "rust1", since = "1.0.0")] pub use current::current; -pub(crate) use current::{current_id, current_or_unnamed, drop_current, set_current, try_current}; +pub(crate) use current::{current_id, current_or_unnamed, drop_current}; +use current::{set_current, try_with_current}; mod spawnhook; @@ -495,10 +496,7 @@ impl Builder { }); let id = ThreadId::new(); - let my_thread = match name { - Some(name) => Thread::new(id, name.into()), - None => Thread::new_unnamed(id), - }; + let my_thread = Thread::new(id, name); let hooks = if no_hooks { spawnhook::ChildSpawnHooks::default() @@ -1228,7 +1226,7 @@ impl ThreadId { } } - #[cfg(not(target_thread_local))] + #[cfg(any(not(target_thread_local), target_has_atomic = "64"))] fn from_u64(v: u64) -> Option { NonZero::new(v).map(ThreadId) } @@ -1252,30 +1250,16 @@ impl ThreadId { // Thread //////////////////////////////////////////////////////////////////////////////// -/// The internal representation of a `Thread`'s name. -enum ThreadName { - Main, - Other(ThreadNameString), - Unnamed, -} - // This module ensures private fields are kept private, which is necessary to enforce the safety requirements. mod thread_name_string { - use core::str; - - use super::ThreadName; use crate::ffi::{CStr, CString}; + use crate::str; /// Like a `String` it's guaranteed UTF-8 and like a `CString` it's null terminated. pub(crate) struct ThreadNameString { inner: CString, } - impl core::ops::Deref for ThreadNameString { - type Target = CStr; - fn deref(&self) -> &CStr { - &self.inner - } - } + impl From for ThreadNameString { fn from(s: String) -> Self { Self { @@ -1283,27 +1267,91 @@ mod thread_name_string { } } } - impl ThreadName { - pub fn as_cstr(&self) -> Option<&CStr> { - match self { - ThreadName::Main => Some(c"main"), - ThreadName::Other(other) => Some(other), - ThreadName::Unnamed => None, - } + + impl ThreadNameString { + pub fn as_cstr(&self) -> &CStr { + &self.inner } - pub fn as_str(&self) -> Option<&str> { - // SAFETY: `as_cstr` can only return `Some` for a fixed CStr or a `ThreadNameString`, - // which is guaranteed to be UTF-8. - self.as_cstr().map(|s| unsafe { str::from_utf8_unchecked(s.to_bytes()) }) + pub fn as_str(&self) -> &str { + // SAFETY: `ThreadNameString` is guaranteed to be UTF-8. + unsafe { str::from_utf8_unchecked(self.inner.to_bytes()) } + } + } +} + +use thread_name_string::ThreadNameString; + +pub(crate) mod main_thread { + cfg_if::cfg_if! { + if #[cfg(target_has_atomic = "64")] { + use super::ThreadId; + use crate::sync::atomic::AtomicU64; + use crate::sync::atomic::Ordering::Relaxed; + + static MAIN: AtomicU64 = AtomicU64::new(0); + + pub(super) fn get() -> Option { + ThreadId::from_u64(MAIN.load(Relaxed)) + } + + /// # Safety + /// May only be called once. + pub(crate) unsafe fn set(id: ThreadId) { + MAIN.store(id.as_u64().get(), Relaxed) + } + } else { + use super::ThreadId; + use crate::mem::MaybeUninit; + use crate::sync::atomic::AtomicBool; + use crate::sync::atomic::Ordering::{Acquire, Release}; + + static INIT: AtomicBool = AtomicBool::new(false); + static mut MAIN: MaybeUninit = MaybeUninit::uninit(); + + pub(super) fn get() -> Option { + if INIT.load(Acquire) { + Some(unsafe { MAIN.assume_init() }) + } else { + None + } + } + + /// # Safety + /// May only be called once. + pub(crate) unsafe fn set(id: ThreadId) { + unsafe { MAIN = MaybeUninit::new(id) }; + INIT.store(true, Release); + } } } } -pub(crate) use thread_name_string::ThreadNameString; + +pub(crate) fn with_current_name(f: F) -> R +where + F: FnOnce(Option<&str>) -> R, +{ + try_with_current(|thread| { + if let Some(thread) = thread { + if let Some(name) = &thread.inner.name { + return f(Some(name.as_str())); + } else if Some(thread.inner.id) == main_thread::get() { + return f(Some("main")); + } + } else if let Some(main) = main_thread::get() + && let Some(id) = current::id::get() + && id == main + { + return f(Some("main")); + } + + f(None) + }) +} /// The internal representation of a `Thread` handle struct Inner { - name: ThreadName, // Guaranteed to be UTF-8 + name: Option, id: ThreadId, parker: Parker, } @@ -1339,21 +1387,9 @@ pub struct Thread { } impl Thread { - /// Used only internally to construct a thread object without spawning. - pub(crate) fn new(id: ThreadId, name: String) -> Thread { - Self::new_inner(id, ThreadName::Other(name.into())) - } - - pub(crate) fn new_unnamed(id: ThreadId) -> Thread { - Self::new_inner(id, ThreadName::Unnamed) - } - - /// Constructs the thread handle for the main thread. - pub(crate) fn new_main(id: ThreadId) -> Thread { - Self::new_inner(id, ThreadName::Main) - } + pub(crate) fn new(id: ThreadId, name: Option) -> Thread { + let name = name.map(ThreadNameString::from); - fn new_inner(id: ThreadId, name: ThreadName) -> Thread { // We have to use `unsafe` here to construct the `Parker` in-place, // which is required for the UNIX implementation. // @@ -1485,7 +1521,13 @@ impl Thread { #[stable(feature = "rust1", since = "1.0.0")] #[must_use] pub fn name(&self) -> Option<&str> { - self.inner.name.as_str() + if let Some(name) = &self.inner.name { + Some(name.as_str()) + } else if main_thread::get() == Some(self.inner.id) { + Some("main") + } else { + None + } } /// Consumes the `Thread`, returning a raw pointer. @@ -1537,7 +1579,13 @@ impl Thread { } fn cname(&self) -> Option<&CStr> { - self.inner.name.as_cstr() + if let Some(name) = &self.inner.name { + Some(name.as_cstr()) + } else if main_thread::get() == Some(self.inner.id) { + Some(c"main") + } else { + None + } } }