Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Eagerly run TLS destructors to properly handle stack overflows #109858

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions library/std/src/sys/unix/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,13 @@ pub(crate) fn unix_sigpipe_attr_specified() -> bool {
// SAFETY: must be called only once during runtime cleanup.
// NOTE: this is not guaranteed to run, for example when the program aborts.
pub unsafe fn cleanup() {
// Eagerly run TLS destructors while the stack overflow handler is still
// active. Since this operation is outside any user code scope, there can
// be no outstanding to the data. The TLS destructors would otherwise be
// run directly after this function returned, so there should be no
// observable differences in behaviour.
#[cfg(target_thread_local)]
thread_local_dtor::run_dtors();
stack_overflow::cleanup();
}

Expand Down
9 changes: 4 additions & 5 deletions library/std/src/sys/unix/stack_overflow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ mod imp {
use crate::io;
use crate::mem;
use crate::ptr;
use crate::thread;
use crate::sys_common::thread_info::current_thread;

use libc::MAP_FAILED;
#[cfg(not(all(target_os = "linux", target_env = "gnu")))]
Expand Down Expand Up @@ -89,10 +89,9 @@ mod imp {
// If the faulting address is within the guard page, then we print a
// message saying so and abort.
if guard.start <= addr && addr < guard.end {
rtprintpanic!(
"\nthread '{}' has overflowed its stack\n",
thread::current().name().unwrap_or("<unknown>")
);
let thread = current_thread();
let name = thread.as_ref().and_then(|t| t.name()).unwrap_or("<unknown>");
rtprintpanic!("\nthread '{}' has overflowed its stack\n", name);
rtabort!("stack overflow");
} else {
// Unregister ourselves by reverting back to the default behavior.
Expand Down
8 changes: 8 additions & 0 deletions library/std/src/sys/unix/thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,14 @@ impl Thread {
let _handler = stack_overflow::Handler::new();
// Finally, let's run some code.
Box::from_raw(main as *mut Box<dyn FnOnce()>)();
// Eagerly run TLS destructors while the stack overflow
// handler is still active. Since this operation is outside
// any user code scope, there can be no outstanding to the
// data. The TLS destructors would otherwise be run directly
// after this function returned, so there should be no observable
// differences in behaviour.
#[cfg(target_thread_local)]
super::thread_local_dtor::run_dtors();
}
ptr::null_mut()
}
Expand Down
91 changes: 3 additions & 88 deletions library/std/src/sys/unix/thread_local_dtor.rs
Original file line number Diff line number Diff line change
@@ -1,91 +1,6 @@
#![cfg(target_thread_local)]
#![unstable(feature = "thread_local_internals", issue = "none")]

//! Provides thread-local destructors without an associated "key", which
//! can be more efficient.

// Since what appears to be glibc 2.18 this symbol has been shipped which
// GCC and clang both use to invoke destructors in thread_local globals, so
// let's do the same!
//
// Note, however, that we run on lots older linuxes, as well as cross
// compiling from a newer linux to an older linux, so we also have a
// fallback implementation to use as well.
#[cfg(any(target_os = "linux", target_os = "fuchsia", target_os = "redox"))]
pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
use crate::mem;
use crate::sys_common::thread_local_dtor::register_dtor_fallback;

extern "C" {
#[linkage = "extern_weak"]
static __dso_handle: *mut u8;
#[linkage = "extern_weak"]
static __cxa_thread_atexit_impl: *const libc::c_void;
}
if !__cxa_thread_atexit_impl.is_null() {
type F = unsafe extern "C" fn(
dtor: unsafe extern "C" fn(*mut u8),
arg: *mut u8,
dso_handle: *mut u8,
) -> libc::c_int;
mem::transmute::<*const libc::c_void, F>(__cxa_thread_atexit_impl)(
dtor,
t,
&__dso_handle as *const _ as *mut _,
);
return;
}
register_dtor_fallback(t, dtor);
}

// This implementation is very similar to register_dtor_fallback in
// sys_common/thread_local.rs. The main difference is that we want to hook into
// macOS's analog of the above linux function, _tlv_atexit. OSX will run the
// registered dtors before any TLS slots get freed, and when the main thread
// exits.
//
// Unfortunately, calling _tlv_atexit while tls dtors are running is UB. The
// workaround below is to register, via _tlv_atexit, a custom DTOR list once per
// thread. thread_local dtors are pushed to the DTOR list without calling
// _tlv_atexit.
#[cfg(target_os = "macos")]
pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
use crate::cell::Cell;
use crate::mem;
use crate::ptr;

#[thread_local]
static REGISTERED: Cell<bool> = Cell::new(false);

#[thread_local]
static mut DTORS: Vec<(*mut u8, unsafe extern "C" fn(*mut u8))> = Vec::new();

if !REGISTERED.get() {
_tlv_atexit(run_dtors, ptr::null_mut());
REGISTERED.set(true);
}

extern "C" {
fn _tlv_atexit(dtor: unsafe extern "C" fn(*mut u8), arg: *mut u8);
}

let list = &mut DTORS;
list.push((t, dtor));

unsafe extern "C" fn run_dtors(_: *mut u8) {
let mut list = mem::take(&mut DTORS);
while !list.is_empty() {
for (ptr, dtor) in list {
dtor(ptr);
}
list = mem::take(&mut DTORS);
}
}
}

#[cfg(any(target_os = "vxworks", target_os = "horizon", target_os = "emscripten"))]
#[cfg_attr(target_family = "wasm", allow(unused))] // might remain unused depending on target details (e.g. wasm32-unknown-emscripten)
pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
use crate::sys_common::thread_local_dtor::register_dtor_fallback;
register_dtor_fallback(t, dtor);
}
pub use crate::sys_common::thread_local_dtor::{
register_dtor_fallback as register_dtor, run_dtors,
};
46 changes: 14 additions & 32 deletions library/std/src/sys_common/thread_info.rs
Original file line number Diff line number Diff line change
@@ -1,47 +1,29 @@
#![allow(dead_code)] // stack_guard isn't used right now on all platforms
#![allow(unused_unsafe)] // thread_local with `const {}` triggers this liny

use crate::cell::RefCell;
use crate::cell::OnceCell;
use crate::sys::thread::guard::Guard;
use crate::thread::Thread;

struct ThreadInfo {
stack_guard: Option<Guard>,
thread: Thread,
}

thread_local! { static THREAD_INFO: RefCell<Option<ThreadInfo>> = const { RefCell::new(None) } }

impl ThreadInfo {
fn with<R, F>(f: F) -> Option<R>
where
F: FnOnce(&mut ThreadInfo) -> R,
{
THREAD_INFO
.try_with(move |thread_info| {
let mut thread_info = thread_info.borrow_mut();
let thread_info = thread_info.get_or_insert_with(|| ThreadInfo {
stack_guard: None,
thread: Thread::new(None),
});
f(thread_info)
})
.ok()
}
thread_local! {
static THREAD: OnceCell<Thread> = const { OnceCell::new() };
// Use a separate thread local for the stack guard page location.
// Since `Guard` does not implement drop, this is always available
// on systems with ELF-TLS, in particular during TLS destruction.
static STACK_GUARD: OnceCell<Guard> = const { OnceCell::new() };
}

pub fn current_thread() -> Option<Thread> {
ThreadInfo::with(|info| info.thread.clone())
THREAD.try_with(|thread| thread.get_or_init(|| Thread::new(None)).clone()).ok()
}

pub fn stack_guard() -> Option<Guard> {
ThreadInfo::with(|info| info.stack_guard.clone()).and_then(|o| o)
STACK_GUARD.try_with(|guard| guard.get().cloned()).ok().flatten()
}

pub fn set(stack_guard: Option<Guard>, thread: Thread) {
THREAD_INFO.with(move |thread_info| {
let mut thread_info = thread_info.borrow_mut();
rtassert!(thread_info.is_none());
*thread_info = Some(ThreadInfo { stack_guard, thread });
});
#[allow(unreachable_patterns, unreachable_code)] // On some platforms, `Guard` is `!`.
if let Some(guard) = stack_guard {
rtassert!(STACK_GUARD.with(|s| s.set(guard)).is_ok());
}
rtassert!(THREAD.with(|t| t.set(thread)).is_ok());
}
47 changes: 27 additions & 20 deletions library/std/src/sys_common/thread_local_dtor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,34 +16,41 @@
use crate::ptr;
use crate::sys_common::thread_local_key::StaticKey;

pub unsafe fn register_dtor_fallback(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
// The fallback implementation uses a vanilla OS-based TLS key to track
// the list of destructors that need to be run for this thread. The key
// then has its own destructor which runs all the other destructors.
//
// The destructor for DTORS is a little special in that it has a `while`
// loop to continuously drain the list of registered destructors. It
// *should* be the case that this loop always terminates because we
// provide the guarantee that a TLS key cannot be set after it is
// flagged for destruction.
type List = Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>;

// The fallback implementation uses a vanilla OS-based TLS key to track
// the list of destructors that need to be run for this thread. The key
// then has its own destructor which runs all the other destructors.
//
// The destructor for DTORS is a little special in that it has a `while`
// loop to continuously drain the list of registered destructors. It
// *should* be the case that this loop always terminates because we
// provide the guarantee that a TLS key cannot be set after it is
// flagged for destruction.
static DTORS: StaticKey = StaticKey::new(Some(run_dtors_internal));

static DTORS: StaticKey = StaticKey::new(Some(run_dtors));
type List = Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>;
pub unsafe fn register_dtor_fallback(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
if DTORS.get().is_null() {
let v: Box<List> = Box::new(Vec::new());
DTORS.set(Box::into_raw(v) as *mut u8);
}
let list: &mut List = &mut *(DTORS.get() as *mut List);
list.push((t, dtor));
}

unsafe extern "C" fn run_dtors(mut ptr: *mut u8) {
while !ptr.is_null() {
let list: Box<List> = Box::from_raw(ptr as *mut List);
for (ptr, dtor) in list.into_iter() {
dtor(ptr);
}
ptr = DTORS.get();
DTORS.set(ptr::null_mut());
unsafe extern "C" fn run_dtors_internal(mut ptr: *mut u8) {
while !ptr.is_null() {
let list: Box<List> = Box::from_raw(ptr as *mut List);
for (ptr, dtor) in list.into_iter() {
dtor(ptr);
}
ptr = DTORS.get();
DTORS.set(ptr::null_mut());
}
}

pub unsafe fn run_dtors() {
let ptr = DTORS.get();
DTORS.set(ptr::null_mut());
run_dtors_internal(ptr);
}
18 changes: 18 additions & 0 deletions tests/ui/abi/stack-probes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ fn main() {
if args.len() > 0 {
match &args[0][..] {
"main-recurse" => overflow_recurse(),
"main-tls-recurse" => tls_recurse(),
"child-recurse" => thread::spawn(overflow_recurse).join().unwrap(),
"child-tls-recurse" => thread::spawn(tls_recurse).join().unwrap(),
"child-frame" => overflow_frame(),
_ => panic!(),
}
Expand All @@ -42,8 +44,10 @@ fn main() {
// details
if cfg!(not(target_os = "linux")) {
assert_overflow(Command::new(&me).arg("main-recurse"));
assert_overflow(Command::new(&me).arg("main-tls-recurse"));
}
assert_overflow(Command::new(&me).arg("child-recurse"));
assert_overflow(Command::new(&me).arg("child-tls-recurse"));
assert_overflow(Command::new(&me).arg("child-frame"));
}

Expand All @@ -56,6 +60,20 @@ fn recurse(array: &MaybeUninit<[u64; 1024]>) {
recurse(&local);
}

fn tls_recurse() {
struct RecursiveDrop;

impl Drop for RecursiveDrop {
fn drop(&mut self) {
overflow_recurse();
}
}

thread_local!(static LOCAL: RecursiveDrop = const { RecursiveDrop });

LOCAL.with(|_| {});
}

#[inline(never)]
fn overflow_recurse() {
recurse(&MaybeUninit::uninit());
Expand Down