From d7692cc18fc58c18040cf38ba7023ec6cd44dd2a Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 25 Jul 2025 22:31:44 +0200 Subject: [PATCH 1/3] Use System allocator for thread-local storage --- .../src/sys/thread_local/destructors/list.rs | 15 ++--- library/std/src/sys/thread_local/key/xous.rs | 6 +- library/std/src/sys/thread_local/os.rs | 12 ++-- library/std/src/thread/mod.rs | 14 ++-- .../threads-sendsync/tls-in-global-alloc.rs | 64 +++++++++++++++++++ 5 files changed, 92 insertions(+), 19 deletions(-) create mode 100644 tests/ui/threads-sendsync/tls-in-global-alloc.rs diff --git a/library/std/src/sys/thread_local/destructors/list.rs b/library/std/src/sys/thread_local/destructors/list.rs index b9d5214c438d2..606694cc78d79 100644 --- a/library/std/src/sys/thread_local/destructors/list.rs +++ b/library/std/src/sys/thread_local/destructors/list.rs @@ -1,19 +1,14 @@ +use crate::alloc::System; use crate::cell::RefCell; use crate::sys::thread_local::guard; #[thread_local] -static DTORS: RefCell> = RefCell::new(Vec::new()); +static DTORS: RefCell> = + RefCell::new(Vec::new_in(System)); pub unsafe fn register(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { - let Ok(mut dtors) = DTORS.try_borrow_mut() else { - // This point can only be reached if the global allocator calls this - // function again. - // FIXME: maybe use the system allocator instead? - rtabort!("the global allocator may not use TLS with destructors"); - }; - + let Ok(mut dtors) = DTORS.try_borrow_mut() else { rtabort!("unreachable") }; guard::enable(); - dtors.push((t, dtor)); } @@ -36,7 +31,7 @@ pub unsafe fn run() { } None => { // Free the list memory. - *dtors = Vec::new(); + *dtors = Vec::new_in(System); break; } } diff --git a/library/std/src/sys/thread_local/key/xous.rs b/library/std/src/sys/thread_local/key/xous.rs index a27cec5ca1a60..529178f34757b 100644 --- a/library/std/src/sys/thread_local/key/xous.rs +++ b/library/std/src/sys/thread_local/key/xous.rs @@ -38,6 +38,7 @@ use core::arch::asm; +use crate::alloc::System; use crate::mem::ManuallyDrop; use crate::os::xous::ffi::{MemoryFlags, map_memory, unmap_memory}; use crate::ptr; @@ -151,7 +152,10 @@ struct Node { } unsafe fn register_dtor(key: Key, dtor: Dtor) { - let mut node = ManuallyDrop::new(Box::new(Node { key, dtor, next: ptr::null_mut() })); + // We use the System allocator here to avoid interfering with a potential + // Global allocator using thread-local storage. + let mut node = + ManuallyDrop::new(Box::new_in(Node { key, dtor, next: ptr::null_mut() }, System)); #[allow(unused_unsafe)] let mut head = unsafe { DTORS.load(Acquire) }; diff --git a/library/std/src/sys/thread_local/os.rs b/library/std/src/sys/thread_local/os.rs index fe6af27db3a17..0fc24ed5158f5 100644 --- a/library/std/src/sys/thread_local/os.rs +++ b/library/std/src/sys/thread_local/os.rs @@ -1,5 +1,6 @@ use super::key::{Key, LazyKey, get, set}; use super::{abort_on_dtor_unwind, guard}; +use crate::alloc::System; use crate::cell::Cell; use crate::marker::PhantomData; use crate::ptr; @@ -95,8 +96,11 @@ impl Storage { return ptr::null(); } - let value = Box::new(Value { value: i.and_then(Option::take).unwrap_or_else(f), key }); - let ptr = Box::into_raw(value); + // We use the System allocator here to avoid interfering with a potential + // Global allocator using thread-local storage. + let value = + Box::new_in(Value { value: i.and_then(Option::take).unwrap_or_else(f), key }, System); + let ptr = Box::into_raw_with_allocator(value).0; // SAFETY: // * key came from a `LazyKey` and is thus correct. @@ -114,7 +118,7 @@ impl Storage { // initializer has already returned and the next scope only starts // after we return the pointer. Therefore, there can be no references // to the old value. - drop(unsafe { Box::from_raw(old) }); + drop(unsafe { Box::from_raw_in(old, System) }); } // SAFETY: We just created this value above. @@ -133,7 +137,7 @@ unsafe extern "C" fn destroy_value(ptr: *mut u8) { // Note that to prevent an infinite loop we reset it back to null right // before we return from the destructor ourselves. abort_on_dtor_unwind(|| { - let ptr = unsafe { Box::from_raw(ptr as *mut Value) }; + let ptr = unsafe { Box::from_raw_in(ptr as *mut Value, System) }; let key = ptr.key; // SAFETY: `key` is the TLS key `ptr` was stored under. unsafe { set(key, ptr::without_provenance_mut(1)) }; diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index b6059c28cec7d..8c6d357d669c3 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -158,6 +158,7 @@ #[cfg(all(test, not(any(target_os = "emscripten", target_os = "wasi"))))] mod tests; +use crate::alloc::System; use crate::any::Any; use crate::cell::UnsafeCell; use crate::ffi::CStr; @@ -1439,7 +1440,10 @@ impl Inner { /// /// [`thread::current`]: current::current pub struct Thread { - inner: Pin>, + // We use the System allocator such that creating or dropping this handle + // does not interfere with a potential Global allocator using thread-local + // storage. + inner: Pin>, } impl Thread { @@ -1452,7 +1456,7 @@ impl Thread { // SAFETY: We pin the Arc immediately after creation, so its address never // changes. let inner = unsafe { - let mut arc = Arc::::new_uninit(); + let mut arc = Arc::::new_uninit_in(System); let ptr = Arc::get_mut_unchecked(&mut arc).as_mut_ptr(); (&raw mut (*ptr).name).write(name); (&raw mut (*ptr).id).write(id); @@ -1610,7 +1614,7 @@ impl Thread { pub fn into_raw(self) -> *const () { // Safety: We only expose an opaque pointer, which maintains the `Pin` invariant. let inner = unsafe { Pin::into_inner_unchecked(self.inner) }; - Arc::into_raw(inner) as *const () + Arc::into_raw_with_allocator(inner).0 as *const () } /// Constructs a `Thread` from a raw pointer. @@ -1632,7 +1636,9 @@ impl Thread { #[unstable(feature = "thread_raw", issue = "97523")] pub unsafe fn from_raw(ptr: *const ()) -> Thread { // Safety: Upheld by caller. - unsafe { Thread { inner: Pin::new_unchecked(Arc::from_raw(ptr as *const Inner)) } } + unsafe { + Thread { inner: Pin::new_unchecked(Arc::from_raw_in(ptr as *const Inner, System)) } + } } fn cname(&self) -> Option<&CStr> { diff --git a/tests/ui/threads-sendsync/tls-in-global-alloc.rs b/tests/ui/threads-sendsync/tls-in-global-alloc.rs new file mode 100644 index 0000000000000..82a96ce9a6c3a --- /dev/null +++ b/tests/ui/threads-sendsync/tls-in-global-alloc.rs @@ -0,0 +1,64 @@ +//@ run-pass +//@ needs-threads + +use std::alloc::{GlobalAlloc, Layout, System}; +use std::thread::Thread; +use std::sync::atomic::{AtomicUsize, AtomicBool, Ordering}; + +static GLOBAL: AtomicUsize = AtomicUsize::new(0); + +struct Local(Thread); + +thread_local! { + static LOCAL: Local = { + GLOBAL.fetch_or(1, Ordering::Relaxed); + Local(std::thread::current()) + }; +} + +impl Drop for Local { + fn drop(&mut self) { + GLOBAL.fetch_or(2, Ordering::Relaxed); + } +} + +static SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS: AtomicBool = AtomicBool::new(false); + +#[global_allocator] +static ALLOC: Alloc = Alloc; +struct Alloc; + +unsafe impl GlobalAlloc for Alloc { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + // Make sure we aren't re-entrant. + assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed)); + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed); + LOCAL.with(|local| { + assert!(local.0.id() == std::thread::current().id()); + }); + let ret = unsafe { System.alloc(layout) }; + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed); + ret + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + // Make sure we aren't re-entrant. + assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed)); + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed); + LOCAL.with(|local| { + assert!(local.0.id() == std::thread::current().id()); + }); + unsafe { System.dealloc(ptr, layout) } + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed); + } +} + +fn main() { + std::thread::spawn(|| { + std::hint::black_box(vec![1, 2]); + assert!(GLOBAL.load(Ordering::Relaxed) == 1); + }) + .join() + .unwrap(); + assert!(GLOBAL.load(Ordering::Relaxed) == 3); +} From 92a4f19ad19082dd4bd453e762e5977a97d74e41 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Sat, 26 Jul 2025 14:30:00 +0200 Subject: [PATCH 2/3] Use spinlock for ThreadId if 64-bit atomic unavailable --- library/std/src/thread/mod.rs | 48 ++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index 8c6d357d669c3..57f2b4f979b37 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -1232,21 +1232,45 @@ impl ThreadId { } } _ => { - use crate::sync::{Mutex, PoisonError}; - - static COUNTER: Mutex = Mutex::new(0); + use crate::cell::SyncUnsafeCell; + use crate::hint::spin_loop; + use crate::sync::atomic::{Atomic, AtomicBool}; + use crate::thread::yield_now; + + // If we don't have a 64-bit atomic we use a small spinlock. We don't use Mutex + // here as we might be trying to get the current thread id in the global allocator, + // and on some platforms Mutex requires allocation. + static COUNTER_LOCKED: Atomic = AtomicBool::new(false); + static COUNTER: SyncUnsafeCell = SyncUnsafeCell::new(0); + + // Acquire lock. + let mut spin = 0; + while COUNTER_LOCKED.compare_exchange_weak(false, true, Ordering::Acquire, Ordering::Relaxed).is_err() { + if spin <= 3 { + for _ in 0..(1 << spin) { + spin_loop(); + } + } else { + yield_now(); + } + spin += 1; + } - let mut counter = COUNTER.lock().unwrap_or_else(PoisonError::into_inner); - let Some(id) = counter.checked_add(1) else { - // in case the panic handler ends up calling `ThreadId::new()`, - // avoid reentrant lock acquire. - drop(counter); - exhausted(); + let id; + // SAFETY: we have an exclusive lock on the counter. + unsafe { + id = (*COUNTER.get()).saturating_add(1); + (*COUNTER.get()) = id; }; - *counter = id; - drop(counter); - ThreadId(NonZero::new(id).unwrap()) + // Release the lock. + COUNTER_LOCKED.store(false, Ordering::Release); + + if id == u64::MAX { + exhausted() + } else { + ThreadId(NonZero::new(id).unwrap()) + } } } } From db1eda9892bd5c50026f6be40af354108aad23b3 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Tue, 29 Jul 2025 19:43:29 +0200 Subject: [PATCH 3/3] Add documentation guaranteeing global allocator use of TLS Remove outdated part of comment claiming thread_local re-enters global allocator Fix typo in doc comment Add comments for guarantees given and footnote that System may still be called Revise mention of using the global allocator Allow for the possibility that the global allocator is the system allocator. Co-authored-by: Mark Rousskov --- library/core/src/alloc/global.rs | 25 +++++++++++++++++++++++++ library/std/src/alloc.rs | 9 ++++++++- library/std/src/thread/current.rs | 19 ++++++++++--------- library/std/src/thread/local.rs | 7 ++++++- 4 files changed, 49 insertions(+), 11 deletions(-) diff --git a/library/core/src/alloc/global.rs b/library/core/src/alloc/global.rs index 5bf6f143b4f82..a572ba12baa4b 100644 --- a/library/core/src/alloc/global.rs +++ b/library/core/src/alloc/global.rs @@ -115,6 +115,31 @@ use crate::{cmp, ptr}; /// Whether allocations happen or not is not part of the program behavior, even if it /// could be detected via an allocator that tracks allocations by printing or otherwise /// having side effects. +/// +/// # Re-entrance +/// +/// When implementing a global allocator one has to be careful not to create an infinitely recursive +/// implementation by accident, as many constructs in the Rust standard library may allocate in +/// their implementation. For example, on some platforms [`std::sync::Mutex`] may allocate, so using +/// it is highly problematic in a global allocator. +/// +/// Generally speaking for this reason one should stick to library features available through +/// [`core`], and avoid using [`std`] in a global allocator. A few features from [`std`] are +/// guaranteed to not use `#[global_allocator]` to allocate: +/// +/// - [`std::thread_local`], +/// - [`std::thread::current`], +/// - [`std::thread::park`] and [`std::thread::Thread`]'s [`unpark`] method and +/// [`Clone`] implementation. +/// +/// [`std`]: ../../std/index.html +/// [`std::sync::Mutex`]: ../../std/sync/struct.Mutex.html +/// [`std::thread_local`]: ../../std/macro.thread_local.html +/// [`std::thread::current`]: ../../std/thread/fn.current.html +/// [`std::thread::park`]: ../../std/thread/fn.park.html +/// [`std::thread::Thread`]: ../../std/thread/struct.Thread.html +/// [`unpark`]: ../../std/thread/struct.Thread.html#method.unpark + #[stable(feature = "global_alloc", since = "1.28.0")] pub unsafe trait GlobalAlloc { /// Allocates memory as described by the given `layout`. diff --git a/library/std/src/alloc.rs b/library/std/src/alloc.rs index 1d61630269ac3..f903a61c2a5d0 100644 --- a/library/std/src/alloc.rs +++ b/library/std/src/alloc.rs @@ -11,7 +11,7 @@ //! //! This attribute allows configuring the choice of global allocator. //! You can use this to implement a completely custom global allocator -//! to route all default allocation requests to a custom object. +//! to route all[^system-alloc] default allocation requests to a custom object. //! //! ```rust //! use std::alloc::{GlobalAlloc, System, Layout}; @@ -52,6 +52,13 @@ //! //! The `#[global_allocator]` can only be used once in a crate //! or its recursive dependencies. +//! +//! [^system-alloc]: Note that the Rust standard library internals may still +//! directly call [`System`] when necessary (for example for the runtime +//! support typically required to implement a global allocator, see [re-entrance] on [`GlobalAlloc`] +//! for more details). +//! +//! [re-entrance]: trait.GlobalAlloc.html#re-entrance #![deny(unsafe_op_in_unsafe_fn)] #![stable(feature = "alloc_module", since = "1.28.0")] diff --git a/library/std/src/thread/current.rs b/library/std/src/thread/current.rs index 7da1621da45ce..b666fe286d4d6 100644 --- a/library/std/src/thread/current.rs +++ b/library/std/src/thread/current.rs @@ -250,15 +250,16 @@ fn init_current(current: *mut ()) -> Thread { // extra TLS write above shouldn't matter. The alternative is nearly always // a stack overflow. - // If you came across this message, contact the author of your allocator. - // If you are said author: A surprising amount of functions inside the - // standard library (e.g. `Mutex`, `thread_local!`, `File` when using long - // paths, even `panic!` when using unwinding), need memory allocation, so - // you'll get circular dependencies all over the place when using them. - // I (joboet) highly recommend using only APIs from core in your allocator - // and implementing your own system abstractions. Still, if you feel that - // a particular API should be entirely allocation-free, feel free to open - // an issue on the Rust repository, we'll see what we can do. + // If you came across this message, contact the author of your + // allocator. If you are said author: A surprising amount of functions + // inside the standard library (e.g. `Mutex`, `File` when using long + // paths, even `panic!` when using unwinding), need memory allocation, + // so you'll get circular dependencies all over the place when using + // them. I (joboet) highly recommend using only APIs from core in your + // allocator and implementing your own system abstractions. Still, if + // you feel that a particular API should be entirely allocation-free, + // feel free to open an issue on the Rust repository, we'll see what we + // can do. rtabort!( "\n\ Attempted to access thread-local data while allocating said data.\n\ diff --git a/library/std/src/thread/local.rs b/library/std/src/thread/local.rs index 797feeb2bbb5f..d29a6059b5ca0 100644 --- a/library/std/src/thread/local.rs +++ b/library/std/src/thread/local.rs @@ -24,12 +24,17 @@ use crate::fmt; /// [`with`]) within a thread, and values that implement [`Drop`] get /// destructed when a thread exits. Some platform-specific caveats apply, which /// are explained below. -/// Note that if the destructor panics, the whole process will be [aborted]. +/// Note that, should the destructor panic, the whole process will be [aborted]. +/// On platforms where initialization requires memory allocation, this is +/// performed directly through [`System`], allowing the [global allocator] +/// to make use of thread local storage. /// /// A `LocalKey`'s initializer cannot recursively depend on itself. Using a /// `LocalKey` in this way may cause panics, aborts, or infinite recursion on /// the first call to `with`. /// +/// [`System`]: crate::alloc::System +/// [global allocator]: crate::alloc /// [aborted]: crate::process::abort /// /// # Single-thread Synchronization