From 6e0725e95b8f6f68cfeced7840044cc433dcc96a Mon Sep 17 00:00:00 2001 From: joboet Date: Mon, 25 Nov 2024 09:50:24 +0100 Subject: [PATCH 1/4] Revert "Remove the Arc rt::init allocation for thread info" This reverts commit 0747f2898e83df7e601189c0f31762e84328becb. --- library/std/src/lib.rs | 1 - library/std/src/rt.rs | 2 +- library/std/src/thread/mod.rs | 170 ++++++++------------------ tests/debuginfo/thread.rs | 8 +- tests/rustdoc/demo-allocator-54478.rs | 1 - 5 files changed, 58 insertions(+), 124 deletions(-) diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index cf99a618e5520..f2d11b1af7734 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -360,7 +360,6 @@ #![feature(std_internals)] #![feature(str_internals)] #![feature(strict_provenance_atomic_ptr)] -#![feature(sync_unsafe_cell)] #![feature(ub_checks)] // tidy-alphabetical-end // diff --git a/library/std/src/rt.rs b/library/std/src/rt.rs index b2492238bd37b..80e7c3c026bd7 100644 --- a/library/std/src/rt.rs +++ b/library/std/src/rt.rs @@ -110,7 +110,7 @@ unsafe fn init(argc: isize, argv: *const *const u8, sigpipe: u8) { // handle does not match the current ID, we should attempt to use the // current thread ID here instead of unconditionally creating a new // one. Also see #130210. - let thread = unsafe { Thread::new_main(thread::current_id()) }; + let thread = Thread::new_main(thread::current_id()); if let Err(_thread) = thread::set_current(thread) { // `thread::current` will create a new handle if none has been set yet. // Thus, if someone uses it before main, this call will fail. That's a diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index 2ff44fcd4c6b7..320372b9c1557 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -158,12 +158,9 @@ #[cfg(all(test, not(any(target_os = "emscripten", target_os = "wasi"))))] mod tests; -use core::cell::SyncUnsafeCell; -use core::ffi::CStr; -use core::mem::MaybeUninit; - use crate::any::Any; use crate::cell::UnsafeCell; +use crate::ffi::CStr; use crate::marker::PhantomData; use crate::mem::{self, ManuallyDrop, forget}; use crate::num::NonZero; @@ -1255,31 +1252,30 @@ impl ThreadId { // Thread //////////////////////////////////////////////////////////////////////////////// +/// The internal representation of a `Thread`'s name. +enum ThreadName { + Main, + Other(ThreadNameString), + Unnamed, +} + // This module ensures private fields are kept private, which is necessary to enforce the safety requirements. mod thread_name_string { use core::str; + use super::ThreadName; use crate::ffi::{CStr, CString}; /// Like a `String` it's guaranteed UTF-8 and like a `CString` it's null terminated. pub(crate) struct ThreadNameString { inner: CString, } - - impl ThreadNameString { - pub fn as_str(&self) -> &str { - // SAFETY: `self.inner` is only initialised via `String`, which upholds the validity invariant of `str`. - unsafe { str::from_utf8_unchecked(self.inner.to_bytes()) } - } - } - impl core::ops::Deref for ThreadNameString { type Target = CStr; fn deref(&self) -> &CStr { &self.inner } } - impl From for ThreadNameString { fn from(s: String) -> Self { Self { @@ -1287,82 +1283,34 @@ mod thread_name_string { } } } + impl ThreadName { + pub fn as_cstr(&self) -> Option<&CStr> { + match self { + ThreadName::Main => Some(c"main"), + ThreadName::Other(other) => Some(other), + ThreadName::Unnamed => None, + } + } + + pub fn as_str(&self) -> Option<&str> { + // SAFETY: `as_cstr` can only return `Some` for a fixed CStr or a `ThreadNameString`, + // which is guaranteed to be UTF-8. + self.as_cstr().map(|s| unsafe { str::from_utf8_unchecked(s.to_bytes()) }) + } + } } pub(crate) use thread_name_string::ThreadNameString; -static MAIN_THREAD_INFO: SyncUnsafeCell<(MaybeUninit, MaybeUninit)> = - SyncUnsafeCell::new((MaybeUninit::uninit(), MaybeUninit::uninit())); - -/// The internal representation of a `Thread` that is not the main thread. -struct OtherInner { - name: Option, +/// The internal representation of a `Thread` handle +struct Inner { + name: ThreadName, // Guaranteed to be UTF-8 id: ThreadId, parker: Parker, } -/// The internal representation of a `Thread` handle. -#[derive(Clone)] -enum Inner { - /// Represents the main thread. May only be constructed by Thread::new_main. - Main(&'static (ThreadId, Parker)), - /// Represents any other thread. - Other(Pin>), -} - impl Inner { - fn id(&self) -> ThreadId { - match self { - Self::Main((thread_id, _)) => *thread_id, - Self::Other(other) => other.id, - } - } - - fn cname(&self) -> Option<&CStr> { - match self { - Self::Main(_) => Some(c"main"), - Self::Other(other) => other.name.as_deref(), - } - } - - fn name(&self) -> Option<&str> { - match self { - Self::Main(_) => Some("main"), - Self::Other(other) => other.name.as_ref().map(ThreadNameString::as_str), - } - } - - fn into_raw(self) -> *const () { - match self { - // Just return the pointer to `MAIN_THREAD_INFO`. - Self::Main(ptr) => crate::ptr::from_ref(ptr).cast(), - Self::Other(arc) => { - // Safety: We only expose an opaque pointer, which maintains the `Pin` invariant. - let inner = unsafe { Pin::into_inner_unchecked(arc) }; - Arc::into_raw(inner) as *const () - } - } - } - - /// # Safety - /// - /// See [`Thread::from_raw`]. - unsafe fn from_raw(ptr: *const ()) -> Self { - // If the pointer is to `MAIN_THREAD_INFO`, we know it is the `Main` variant. - if crate::ptr::eq(ptr.cast(), &MAIN_THREAD_INFO) { - Self::Main(unsafe { &*ptr.cast() }) - } else { - // Safety: Upheld by caller - Self::Other(unsafe { Pin::new_unchecked(Arc::from_raw(ptr as *const OtherInner)) }) - } - } - - fn parker(&self) -> Pin<&Parker> { - match self { - Self::Main((_, parker_ref)) => Pin::static_ref(parker_ref), - Self::Other(inner) => unsafe { - Pin::map_unchecked(inner.as_ref(), |inner| &inner.parker) - }, - } + fn parker(self: Pin<&Self>) -> Pin<&Parker> { + unsafe { Pin::map_unchecked(self, |inner| &inner.parker) } } } @@ -1386,47 +1334,33 @@ impl Inner { /// docs of [`Builder`] and [`spawn`] for more details. /// /// [`thread::current`]: current::current -pub struct Thread(Inner); +pub struct Thread { + inner: Pin>, +} impl Thread { /// Used only internally to construct a thread object without spawning. pub(crate) fn new(id: ThreadId, name: String) -> Thread { - Self::new_inner(id, Some(ThreadNameString::from(name))) + Self::new_inner(id, ThreadName::Other(name.into())) } pub(crate) fn new_unnamed(id: ThreadId) -> Thread { - Self::new_inner(id, None) + Self::new_inner(id, ThreadName::Unnamed) } - /// Used in runtime to construct main thread - /// - /// # Safety - /// - /// This must only ever be called once, and must be called on the main thread. - pub(crate) unsafe fn new_main(thread_id: ThreadId) -> Thread { - // Safety: As this is only called once and on the main thread, nothing else is accessing MAIN_THREAD_INFO - // as the only other read occurs in `main_thread_info` *after* the main thread has been constructed, - // and this function is the only one that constructs the main thread. - // - // Pre-main thread spawning cannot hit this either, as the caller promises that this is only called on the main thread. - let main_thread_info = unsafe { &mut *MAIN_THREAD_INFO.get() }; - - unsafe { Parker::new_in_place((&raw mut main_thread_info.1).cast()) }; - main_thread_info.0.write(thread_id); - - // Store a `'static` ref to the initialised ThreadId and Parker, - // to avoid having to repeatedly prove initialisation. - Self(Inner::Main(unsafe { &*MAIN_THREAD_INFO.get().cast() })) + /// Constructs the thread handle for the main thread. + pub(crate) fn new_main(id: ThreadId) -> Thread { + Self::new_inner(id, ThreadName::Main) } - fn new_inner(id: ThreadId, name: Option) -> Thread { + fn new_inner(id: ThreadId, name: ThreadName) -> Thread { // We have to use `unsafe` here to construct the `Parker` in-place, // which is required for the UNIX implementation. // // SAFETY: We pin the Arc immediately after creation, so its address never // changes. let inner = unsafe { - let mut arc = Arc::::new_uninit(); + let mut arc = Arc::::new_uninit(); let ptr = Arc::get_mut_unchecked(&mut arc).as_mut_ptr(); (&raw mut (*ptr).name).write(name); (&raw mut (*ptr).id).write(id); @@ -1434,7 +1368,7 @@ impl Thread { Pin::new_unchecked(arc.assume_init()) }; - Self(Inner::Other(inner)) + Thread { inner } } /// Like the public [`park`], but callable on any handle. This is used to @@ -1443,7 +1377,7 @@ impl Thread { /// # Safety /// May only be called from the thread to which this handle belongs. pub(crate) unsafe fn park(&self) { - unsafe { self.0.parker().park() } + unsafe { self.inner.as_ref().parker().park() } } /// Like the public [`park_timeout`], but callable on any handle. This is @@ -1452,7 +1386,7 @@ impl Thread { /// # Safety /// May only be called from the thread to which this handle belongs. pub(crate) unsafe fn park_timeout(&self, dur: Duration) { - unsafe { self.0.parker().park_timeout(dur) } + unsafe { self.inner.as_ref().parker().park_timeout(dur) } } /// Atomically makes the handle's token available if it is not already. @@ -1488,7 +1422,7 @@ impl Thread { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn unpark(&self) { - self.0.parker().unpark(); + self.inner.as_ref().parker().unpark(); } /// Gets the thread's unique identifier. @@ -1508,7 +1442,7 @@ impl Thread { #[stable(feature = "thread_id", since = "1.19.0")] #[must_use] pub fn id(&self) -> ThreadId { - self.0.id() + self.inner.id } /// Gets the thread's name. @@ -1551,11 +1485,7 @@ impl Thread { #[stable(feature = "rust1", since = "1.0.0")] #[must_use] pub fn name(&self) -> Option<&str> { - self.0.name() - } - - fn cname(&self) -> Option<&CStr> { - self.0.cname() + self.inner.name.as_str() } /// Consumes the `Thread`, returning a raw pointer. @@ -1579,7 +1509,9 @@ impl Thread { /// ``` #[unstable(feature = "thread_raw", issue = "97523")] pub fn into_raw(self) -> *const () { - self.0.into_raw() + // Safety: We only expose an opaque pointer, which maintains the `Pin` invariant. + let inner = unsafe { Pin::into_inner_unchecked(self.inner) }; + Arc::into_raw(inner) as *const () } /// Constructs a `Thread` from a raw pointer. @@ -1601,7 +1533,11 @@ impl Thread { #[unstable(feature = "thread_raw", issue = "97523")] pub unsafe fn from_raw(ptr: *const ()) -> Thread { // Safety: Upheld by caller. - unsafe { Thread(Inner::from_raw(ptr)) } + unsafe { Thread { inner: Pin::new_unchecked(Arc::from_raw(ptr as *const Inner)) } } + } + + fn cname(&self) -> Option<&CStr> { + self.inner.name.as_cstr() } } diff --git a/tests/debuginfo/thread.rs b/tests/debuginfo/thread.rs index dc8cb0832192b..0415f586f5d90 100644 --- a/tests/debuginfo/thread.rs +++ b/tests/debuginfo/thread.rs @@ -12,15 +12,15 @@ // cdb-check:join_handle,d [Type: std::thread::JoinHandle >] // cdb-check: [...] __0 [Type: std::thread::JoinInner >] // -// cdb-command:dx -r3 t,d +// cdb-command:dx t,d // cdb-check:t,d : [...] [Type: std::thread::Thread *] -// cdb-check: [...] __0 : Other [Type: enum2$] -// cdb-check: [...] __0 [Type: core::pin::Pin >] +// cdb-check:[...] inner [...][Type: core::pin::Pin >] use std::thread; #[allow(unused_variables)] -fn main() { +fn main() +{ let join_handle = thread::spawn(|| { println!("Initialize a thread"); }); diff --git a/tests/rustdoc/demo-allocator-54478.rs b/tests/rustdoc/demo-allocator-54478.rs index 80acfc0ff58a1..dd98e80f03ade 100644 --- a/tests/rustdoc/demo-allocator-54478.rs +++ b/tests/rustdoc/demo-allocator-54478.rs @@ -40,7 +40,6 @@ //! } //! //! fn main() { -//! drop(String::from("An allocation")); //! assert!(unsafe { HIT }); //! } //! ``` From 54991e066c76c881e9b14bbbb374ae4fdd24f03f Mon Sep 17 00:00:00 2001 From: joboet Date: Mon, 25 Nov 2024 13:01:35 +0100 Subject: [PATCH 2/4] std: lazily allocate the main thread handle #123550 eliminated the allocation of the main thread handle, but at the cost of greatly increased complexity. This PR proposes another approach: Instead of creating the main thread handle itself, the runtime simply remembers the thread ID of the main thread. The main thread handle is then only allocated when it is used, using the same lazy-initialization mechanism as for non-runtime use of `thread::current`, and the `name` method uses the thread ID to identify the main thread handle and return the correct name ("main") for it. Thereby, we also allow accessing thread::current before main: as the runtime no longer tries to install its own handle, this will no longer trigger an abort. Rather, the name returned from name will only be "main" after the runtime initialization code has run, but I think that is acceptable. This new approach also requires some changes to the signal handling code, as calling `thread::current` would now allocate when called on the main thread, which is not acceptable. I fixed this by adding a new function (`with_current_name`) that performs all the naming logic without allocation or without initializing the thread ID (which could allocate on some platforms). --- library/std/src/panicking.rs | 7 +- library/std/src/rt.rs | 23 +-- .../std/src/sys/pal/unix/stack_overflow.rs | 9 +- .../std/src/sys/pal/windows/stack_overflow.rs | 8 +- library/std/src/thread/current.rs | 36 ++--- library/std/src/thread/mod.rs | 150 ++++++++++++------ 6 files changed, 134 insertions(+), 99 deletions(-) diff --git a/library/std/src/panicking.rs b/library/std/src/panicking.rs index ac1f547c9143f..f2904688b5b40 100644 --- a/library/std/src/panicking.rs +++ b/library/std/src/panicking.rs @@ -247,15 +247,16 @@ fn default_hook(info: &PanicHookInfo<'_>) { let location = info.location().unwrap(); let msg = payload_as_str(info.payload()); - let thread = thread::try_current(); - let name = thread.as_ref().and_then(|t| t.name()).unwrap_or(""); let write = #[optimize(size)] |err: &mut dyn crate::io::Write| { // Use a lock to prevent mixed output in multithreading context. // Some platforms also require it when printing a backtrace, like `SymFromAddr` on Windows. let mut lock = backtrace::lock(); - let _ = writeln!(err, "thread '{name}' panicked at {location}:\n{msg}"); + thread::with_current_name(|name| { + let name = name.unwrap_or(""); + let _ = writeln!(err, "thread '{name}' panicked at {location}:\n{msg}"); + }); static FIRST_PANIC: AtomicBool = AtomicBool::new(true); diff --git a/library/std/src/rt.rs b/library/std/src/rt.rs index 80e7c3c026bd7..4301f509ba32f 100644 --- a/library/std/src/rt.rs +++ b/library/std/src/rt.rs @@ -23,7 +23,7 @@ pub use core::panicking::{panic_display, panic_fmt}; #[rustfmt::skip] use crate::any::Any; use crate::sync::Once; -use crate::thread::{self, Thread}; +use crate::thread::{self, main_thread}; use crate::{mem, panic, sys}; // Prints to the "panic output", depending on the platform this may be: @@ -102,24 +102,9 @@ unsafe fn init(argc: isize, argv: *const *const u8, sigpipe: u8) { sys::init(argc, argv, sigpipe) }; - // Set up the current thread handle to give it the right name. - // - // When code running before main uses `ReentrantLock` (for example by - // using `println!`), the thread ID can become initialized before we - // create this handle. Since `set_current` fails when the ID of the - // handle does not match the current ID, we should attempt to use the - // current thread ID here instead of unconditionally creating a new - // one. Also see #130210. - let thread = Thread::new_main(thread::current_id()); - if let Err(_thread) = thread::set_current(thread) { - // `thread::current` will create a new handle if none has been set yet. - // Thus, if someone uses it before main, this call will fail. That's a - // bad idea though, as we then cannot set the main thread name here. - // - // FIXME: detect the main thread in `thread::current` and use the - // correct name there. - rtabort!("code running before main must not use thread::current"); - } + // Remember the main thread ID to give it the correct name. + // SAFETY: this is the only time and place where we call this function. + unsafe { main_thread::set(thread::current_id()) }; } /// Clean up the thread-local runtime state. This *should* be run after all other diff --git a/library/std/src/sys/pal/unix/stack_overflow.rs b/library/std/src/sys/pal/unix/stack_overflow.rs index 69b31da427fcb..db5c6bd3a1c32 100644 --- a/library/std/src/sys/pal/unix/stack_overflow.rs +++ b/library/std/src/sys/pal/unix/stack_overflow.rs @@ -100,10 +100,11 @@ mod imp { // If the faulting address is within the guard page, then we print a // message saying so and abort. if start <= addr && addr < end { - rtprintpanic!( - "\nthread '{}' has overflowed its stack\n", - thread::current().name().unwrap_or("") - ); + thread::with_current_name(|name| { + let name = name.unwrap_or(""); + rtprintpanic!("\nthread '{name}' has overflowed its stack\n"); + }); + rtabort!("stack overflow"); } else { // Unregister ourselves by reverting back to the default behavior. diff --git a/library/std/src/sys/pal/windows/stack_overflow.rs b/library/std/src/sys/pal/windows/stack_overflow.rs index 467e21ab56a28..734cd30bed08f 100644 --- a/library/std/src/sys/pal/windows/stack_overflow.rs +++ b/library/std/src/sys/pal/windows/stack_overflow.rs @@ -18,10 +18,10 @@ unsafe extern "system" fn vectored_handler(ExceptionInfo: *mut c::EXCEPTION_POIN let code = rec.ExceptionCode; if code == c::EXCEPTION_STACK_OVERFLOW { - rtprintpanic!( - "\nthread '{}' has overflowed its stack\n", - thread::current().name().unwrap_or("") - ); + thread::with_current_name(|name| { + let name = name.unwrap_or(""); + rtprintpanic!("\nthread '{name}' has overflowed its stack\n"); + }); } c::EXCEPTION_CONTINUE_SEARCH } diff --git a/library/std/src/thread/current.rs b/library/std/src/thread/current.rs index 1048ef973560e..052b42b4222d0 100644 --- a/library/std/src/thread/current.rs +++ b/library/std/src/thread/current.rs @@ -15,7 +15,7 @@ local_pointer! { /// /// We store the thread ID so that it never gets destroyed during the lifetime /// of a thread, either using `#[thread_local]` or multiple `local_pointer!`s. -mod id { +pub(super) mod id { use super::*; cfg_if::cfg_if! { @@ -27,7 +27,7 @@ mod id { pub(super) const CHEAP: bool = true; - pub(super) fn get() -> Option { + pub(crate) fn get() -> Option { ID.get() } @@ -44,7 +44,7 @@ mod id { pub(super) const CHEAP: bool = false; - pub(super) fn get() -> Option { + pub(crate) fn get() -> Option { let id0 = ID0.get().addr() as u64; let id16 = ID16.get().addr() as u64; let id32 = ID32.get().addr() as u64; @@ -67,7 +67,7 @@ mod id { pub(super) const CHEAP: bool = false; - pub(super) fn get() -> Option { + pub(crate) fn get() -> Option { let id0 = ID0.get().addr() as u64; let id32 = ID32.get().addr() as u64; ThreadId::from_u64((id32 << 32) + id0) @@ -85,7 +85,7 @@ mod id { pub(super) const CHEAP: bool = true; - pub(super) fn get() -> Option { + pub(crate) fn get() -> Option { let id = ID.get().addr() as u64; ThreadId::from_u64(id) } @@ -112,7 +112,7 @@ mod id { /// Tries to set the thread handle for the current thread. Fails if a handle was /// already set or if the thread ID of `thread` would change an already-set ID. -pub(crate) fn set_current(thread: Thread) -> Result<(), Thread> { +pub(super) fn set_current(thread: Thread) -> Result<(), Thread> { if CURRENT.get() != NONE { return Err(thread); } @@ -140,28 +140,28 @@ pub(crate) fn current_id() -> ThreadId { // to retrieve it from the current thread handle, which will only take one // TLS access. if !id::CHEAP { - let current = CURRENT.get(); - if current > DESTROYED { - unsafe { - let current = ManuallyDrop::new(Thread::from_raw(current)); - return current.id(); - } + if let Some(id) = try_with_current(|t| t.map(|t| t.id())) { + return id; } } id::get_or_init() } -/// Gets a handle to the thread that invokes it, if the handle has been initialized. -pub(crate) fn try_current() -> Option { +/// Gets a reference to the handle of the thread that invokes it, if the handle +/// has been initialized. +pub(super) fn try_with_current(f: F) -> R +where + F: FnOnce(Option<&Thread>) -> R, +{ let current = CURRENT.get(); if current > DESTROYED { unsafe { let current = ManuallyDrop::new(Thread::from_raw(current)); - Some((*current).clone()) + f(Some(¤t)) } } else { - None + f(None) } } @@ -176,7 +176,7 @@ pub(crate) fn current_or_unnamed() -> Thread { (*current).clone() } } else if current == DESTROYED { - Thread::new_unnamed(id::get_or_init()) + Thread::new(id::get_or_init(), None) } else { init_current(current) } @@ -221,7 +221,7 @@ fn init_current(current: *mut ()) -> Thread { CURRENT.set(BUSY); // If the thread ID was initialized already, use it. let id = id::get_or_init(); - let thread = Thread::new_unnamed(id); + let thread = Thread::new(id, None); // Make sure that `crate::rt::thread_cleanup` will be run, which will // call `drop_current`. diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index 320372b9c1557..38b8c1ddcd3bf 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -183,7 +183,8 @@ mod current; #[stable(feature = "rust1", since = "1.0.0")] pub use current::current; -pub(crate) use current::{current_id, current_or_unnamed, drop_current, set_current, try_current}; +pub(crate) use current::{current_id, current_or_unnamed, drop_current}; +use current::{set_current, try_with_current}; mod spawnhook; @@ -495,10 +496,7 @@ impl Builder { }); let id = ThreadId::new(); - let my_thread = match name { - Some(name) => Thread::new(id, name.into()), - None => Thread::new_unnamed(id), - }; + let my_thread = Thread::new(id, name); let hooks = if no_hooks { spawnhook::ChildSpawnHooks::default() @@ -1228,7 +1226,7 @@ impl ThreadId { } } - #[cfg(not(target_thread_local))] + #[cfg(any(not(target_thread_local), target_has_atomic = "64"))] fn from_u64(v: u64) -> Option { NonZero::new(v).map(ThreadId) } @@ -1252,30 +1250,16 @@ impl ThreadId { // Thread //////////////////////////////////////////////////////////////////////////////// -/// The internal representation of a `Thread`'s name. -enum ThreadName { - Main, - Other(ThreadNameString), - Unnamed, -} - // This module ensures private fields are kept private, which is necessary to enforce the safety requirements. mod thread_name_string { - use core::str; - - use super::ThreadName; use crate::ffi::{CStr, CString}; + use crate::str; /// Like a `String` it's guaranteed UTF-8 and like a `CString` it's null terminated. pub(crate) struct ThreadNameString { inner: CString, } - impl core::ops::Deref for ThreadNameString { - type Target = CStr; - fn deref(&self) -> &CStr { - &self.inner - } - } + impl From for ThreadNameString { fn from(s: String) -> Self { Self { @@ -1283,27 +1267,91 @@ mod thread_name_string { } } } - impl ThreadName { - pub fn as_cstr(&self) -> Option<&CStr> { - match self { - ThreadName::Main => Some(c"main"), - ThreadName::Other(other) => Some(other), - ThreadName::Unnamed => None, - } + + impl ThreadNameString { + pub fn as_cstr(&self) -> &CStr { + &self.inner } - pub fn as_str(&self) -> Option<&str> { - // SAFETY: `as_cstr` can only return `Some` for a fixed CStr or a `ThreadNameString`, - // which is guaranteed to be UTF-8. - self.as_cstr().map(|s| unsafe { str::from_utf8_unchecked(s.to_bytes()) }) + pub fn as_str(&self) -> &str { + // SAFETY: `ThreadNameString` is guaranteed to be UTF-8. + unsafe { str::from_utf8_unchecked(self.inner.to_bytes()) } + } + } +} + +use thread_name_string::ThreadNameString; + +pub(crate) mod main_thread { + cfg_if::cfg_if! { + if #[cfg(target_has_atomic = "64")] { + use super::ThreadId; + use crate::sync::atomic::AtomicU64; + use crate::sync::atomic::Ordering::Relaxed; + + static MAIN: AtomicU64 = AtomicU64::new(0); + + pub(super) fn get() -> Option { + ThreadId::from_u64(MAIN.load(Relaxed)) + } + + /// # Safety + /// May only be called once. + pub(crate) unsafe fn set(id: ThreadId) { + MAIN.store(id.as_u64().get(), Relaxed) + } + } else { + use super::ThreadId; + use crate::mem::MaybeUninit; + use crate::sync::atomic::AtomicBool; + use crate::sync::atomic::Ordering::{Acquire, Release}; + + static INIT: AtomicBool = AtomicBool::new(false); + static mut MAIN: MaybeUninit = MaybeUninit::uninit(); + + pub(super) fn get() -> Option { + if INIT.load(Acquire) { + Some(unsafe { MAIN.assume_init() }) + } else { + None + } + } + + /// # Safety + /// May only be called once. + pub(crate) unsafe fn set(id: ThreadId) { + unsafe { MAIN = MaybeUninit::new(id) }; + INIT.store(true, Release); + } } } } -pub(crate) use thread_name_string::ThreadNameString; + +pub(crate) fn with_current_name(f: F) -> R +where + F: FnOnce(Option<&str>) -> R, +{ + try_with_current(|thread| { + if let Some(thread) = thread { + if let Some(name) = &thread.inner.name { + return f(Some(name.as_str())); + } else if Some(thread.inner.id) == main_thread::get() { + return f(Some("main")); + } + } else if let Some(main) = main_thread::get() + && let Some(id) = current::id::get() + && id == main + { + return f(Some("main")); + } + + f(None) + }) +} /// The internal representation of a `Thread` handle struct Inner { - name: ThreadName, // Guaranteed to be UTF-8 + name: Option, id: ThreadId, parker: Parker, } @@ -1339,21 +1387,9 @@ pub struct Thread { } impl Thread { - /// Used only internally to construct a thread object without spawning. - pub(crate) fn new(id: ThreadId, name: String) -> Thread { - Self::new_inner(id, ThreadName::Other(name.into())) - } - - pub(crate) fn new_unnamed(id: ThreadId) -> Thread { - Self::new_inner(id, ThreadName::Unnamed) - } - - /// Constructs the thread handle for the main thread. - pub(crate) fn new_main(id: ThreadId) -> Thread { - Self::new_inner(id, ThreadName::Main) - } + pub(crate) fn new(id: ThreadId, name: Option) -> Thread { + let name = name.map(ThreadNameString::from); - fn new_inner(id: ThreadId, name: ThreadName) -> Thread { // We have to use `unsafe` here to construct the `Parker` in-place, // which is required for the UNIX implementation. // @@ -1485,7 +1521,13 @@ impl Thread { #[stable(feature = "rust1", since = "1.0.0")] #[must_use] pub fn name(&self) -> Option<&str> { - self.inner.name.as_str() + if let Some(name) = &self.inner.name { + Some(name.as_str()) + } else if main_thread::get() == Some(self.inner.id) { + Some("main") + } else { + None + } } /// Consumes the `Thread`, returning a raw pointer. @@ -1537,7 +1579,13 @@ impl Thread { } fn cname(&self) -> Option<&CStr> { - self.inner.name.as_cstr() + if let Some(name) = &self.inner.name { + Some(name.as_cstr()) + } else if main_thread::get() == Some(self.inner.id) { + Some(c"main") + } else { + None + } } } From 1861a0c58cafdfa771ff342ddd94126c6a0a3065 Mon Sep 17 00:00:00 2001 From: joboet Date: Mon, 25 Nov 2024 14:03:02 +0100 Subject: [PATCH 3/4] make sure that the allocator is actually called in allocator test Originally authored by GnomedDev --- tests/rustdoc/demo-allocator-54478.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/rustdoc/demo-allocator-54478.rs b/tests/rustdoc/demo-allocator-54478.rs index dd98e80f03ade..80acfc0ff58a1 100644 --- a/tests/rustdoc/demo-allocator-54478.rs +++ b/tests/rustdoc/demo-allocator-54478.rs @@ -40,6 +40,7 @@ //! } //! //! fn main() { +//! drop(String::from("An allocation")); //! assert!(unsafe { HIT }); //! } //! ``` From 46176921a69310d8628d2762885c65244390f976 Mon Sep 17 00:00:00 2001 From: joboet Date: Wed, 27 Nov 2024 14:40:09 +0100 Subject: [PATCH 4/4] add comments explaining main thread identification --- library/std/src/thread/current.rs | 3 +++ library/std/src/thread/mod.rs | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/library/std/src/thread/current.rs b/library/std/src/thread/current.rs index 052b42b4222d0..bddf91815ebfd 100644 --- a/library/std/src/thread/current.rs +++ b/library/std/src/thread/current.rs @@ -156,6 +156,9 @@ where { let current = CURRENT.get(); if current > DESTROYED { + // SAFETY: `Arc` does not contain interior mutability, so it does not + // matter that the address of the handle might be different depending + // on where this is called. unsafe { let current = ManuallyDrop::new(Thread::from_raw(current)); f(Some(¤t)) diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index 38b8c1ddcd3bf..eaf69ca635a20 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -1282,6 +1282,18 @@ mod thread_name_string { use thread_name_string::ThreadNameString; +/// Store the ID of the main thread. +/// +/// The thread handle for the main thread is created lazily, and this might even +/// happen pre-main. Since not every platform has a way to identify the main +/// thread when that happens – macOS's `pthread_main_np` function being a notable +/// exception – we cannot assign it the right name right then. Instead, in our +/// runtime startup code, we remember the thread ID of the main thread (through +/// this modules `set` function) and use it to identify the main thread from then +/// on. This works reliably and has the additional advantage that we can report +/// the right thread name on main even after the thread handle has been destroyed. +/// Note however that this also means that the name reported in pre-main functions +/// will be incorrect, but that's just something we have to live with. pub(crate) mod main_thread { cfg_if::cfg_if! { if #[cfg(target_has_atomic = "64")] { @@ -1327,21 +1339,35 @@ pub(crate) mod main_thread { } } +/// Run a function with the current thread's name. +/// +/// Modulo thread local accesses, this function is safe to call from signal +/// handlers and in similar circumstances where allocations are not possible. pub(crate) fn with_current_name(f: F) -> R where F: FnOnce(Option<&str>) -> R, { try_with_current(|thread| { if let Some(thread) = thread { + // If there is a current thread handle, try to use the name stored + // there. if let Some(name) = &thread.inner.name { return f(Some(name.as_str())); } else if Some(thread.inner.id) == main_thread::get() { + // The main thread doesn't store its name in the handle, we must + // identify it through its ID. Since we already have the `Thread`, + // we can retrieve the ID from it instead of going through another + // thread local. return f(Some("main")); } } else if let Some(main) = main_thread::get() && let Some(id) = current::id::get() && id == main { + // The main thread doesn't always have a thread handle, we must + // identify it through its ID instead. The checks are ordered so + // that the current ID is only loaded if it is actually needed, + // since loading it from TLS might need multiple expensive accesses. return f(Some("main")); }