Skip to content

Commit ffe818a

Browse files
committed
Auto merge of #144465 - orlp:system-alloc-tls, r=Mark-Simulacrum
Allow the global allocator to use thread-local storage and std::thread::current() Fixes #115209. Currently the thread-local storage implementation uses the `Global` allocator if it needs to allocate memory in some places. This effectively means the global allocator can not use thread-local variables. This is a shame as an allocator is precisely one of the locations where you'd *really* want to use thread-locals. We also see that this lead to hacks such as #116402, where we detect re-entrance and abort. So I've made the places where I could find allocation happening in the TLS implementation use the `System` allocator instead. I also applied this change to the storage allocated for a `Thread` handle so that it may be used care-free in the global allocator as well, for e.g. registering it to a central place or parking primitives. r? `@joboet`
2 parents 6d6a08c + db1eda9 commit ffe818a

File tree

9 files changed

+177
-42
lines changed

9 files changed

+177
-42
lines changed

library/core/src/alloc/global.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,31 @@ use crate::{cmp, ptr};
115115
/// Whether allocations happen or not is not part of the program behavior, even if it
116116
/// could be detected via an allocator that tracks allocations by printing or otherwise
117117
/// having side effects.
118+
///
119+
/// # Re-entrance
120+
///
121+
/// When implementing a global allocator one has to be careful not to create an infinitely recursive
122+
/// implementation by accident, as many constructs in the Rust standard library may allocate in
123+
/// their implementation. For example, on some platforms [`std::sync::Mutex`] may allocate, so using
124+
/// it is highly problematic in a global allocator.
125+
///
126+
/// Generally speaking for this reason one should stick to library features available through
127+
/// [`core`], and avoid using [`std`] in a global allocator. A few features from [`std`] are
128+
/// guaranteed to not use `#[global_allocator]` to allocate:
129+
///
130+
/// - [`std::thread_local`],
131+
/// - [`std::thread::current`],
132+
/// - [`std::thread::park`] and [`std::thread::Thread`]'s [`unpark`] method and
133+
/// [`Clone`] implementation.
134+
///
135+
/// [`std`]: ../../std/index.html
136+
/// [`std::sync::Mutex`]: ../../std/sync/struct.Mutex.html
137+
/// [`std::thread_local`]: ../../std/macro.thread_local.html
138+
/// [`std::thread::current`]: ../../std/thread/fn.current.html
139+
/// [`std::thread::park`]: ../../std/thread/fn.park.html
140+
/// [`std::thread::Thread`]: ../../std/thread/struct.Thread.html
141+
/// [`unpark`]: ../../std/thread/struct.Thread.html#method.unpark
142+
118143
#[stable(feature = "global_alloc", since = "1.28.0")]
119144
pub unsafe trait GlobalAlloc {
120145
/// Allocates memory as described by the given `layout`.

library/std/src/alloc.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
//!
1212
//! This attribute allows configuring the choice of global allocator.
1313
//! You can use this to implement a completely custom global allocator
14-
//! to route all default allocation requests to a custom object.
14+
//! to route all[^system-alloc] default allocation requests to a custom object.
1515
//!
1616
//! ```rust
1717
//! use std::alloc::{GlobalAlloc, System, Layout};
@@ -52,6 +52,13 @@
5252
//!
5353
//! The `#[global_allocator]` can only be used once in a crate
5454
//! or its recursive dependencies.
55+
//!
56+
//! [^system-alloc]: Note that the Rust standard library internals may still
57+
//! directly call [`System`] when necessary (for example for the runtime
58+
//! support typically required to implement a global allocator, see [re-entrance] on [`GlobalAlloc`]
59+
//! for more details).
60+
//!
61+
//! [re-entrance]: trait.GlobalAlloc.html#re-entrance
5562
5663
#![deny(unsafe_op_in_unsafe_fn)]
5764
#![stable(feature = "alloc_module", since = "1.28.0")]

library/std/src/sys/thread_local/destructors/list.rs

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,14 @@
1+
use crate::alloc::System;
12
use crate::cell::RefCell;
23
use crate::sys::thread_local::guard;
34

45
#[thread_local]
5-
static DTORS: RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>> = RefCell::new(Vec::new());
6+
static DTORS: RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8)), System>> =
7+
RefCell::new(Vec::new_in(System));
68

79
pub unsafe fn register(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
8-
let Ok(mut dtors) = DTORS.try_borrow_mut() else {
9-
// This point can only be reached if the global allocator calls this
10-
// function again.
11-
// FIXME: maybe use the system allocator instead?
12-
rtabort!("the global allocator may not use TLS with destructors");
13-
};
14-
10+
let Ok(mut dtors) = DTORS.try_borrow_mut() else { rtabort!("unreachable") };
1511
guard::enable();
16-
1712
dtors.push((t, dtor));
1813
}
1914

@@ -36,7 +31,7 @@ pub unsafe fn run() {
3631
}
3732
None => {
3833
// Free the list memory.
39-
*dtors = Vec::new();
34+
*dtors = Vec::new_in(System);
4035
break;
4136
}
4237
}

library/std/src/sys/thread_local/key/xous.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838

3939
use core::arch::asm;
4040

41+
use crate::alloc::System;
4142
use crate::mem::ManuallyDrop;
4243
use crate::os::xous::ffi::{MemoryFlags, map_memory, unmap_memory};
4344
use crate::ptr;
@@ -151,7 +152,10 @@ struct Node {
151152
}
152153

153154
unsafe fn register_dtor(key: Key, dtor: Dtor) {
154-
let mut node = ManuallyDrop::new(Box::new(Node { key, dtor, next: ptr::null_mut() }));
155+
// We use the System allocator here to avoid interfering with a potential
156+
// Global allocator using thread-local storage.
157+
let mut node =
158+
ManuallyDrop::new(Box::new_in(Node { key, dtor, next: ptr::null_mut() }, System));
155159

156160
#[allow(unused_unsafe)]
157161
let mut head = unsafe { DTORS.load(Acquire) };

library/std/src/sys/thread_local/os.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use super::key::{Key, LazyKey, get, set};
22
use super::{abort_on_dtor_unwind, guard};
3+
use crate::alloc::System;
34
use crate::cell::Cell;
45
use crate::marker::PhantomData;
56
use crate::ptr;
@@ -95,8 +96,11 @@ impl<T: 'static> Storage<T> {
9596
return ptr::null();
9697
}
9798

98-
let value = Box::new(Value { value: i.and_then(Option::take).unwrap_or_else(f), key });
99-
let ptr = Box::into_raw(value);
99+
// We use the System allocator here to avoid interfering with a potential
100+
// Global allocator using thread-local storage.
101+
let value =
102+
Box::new_in(Value { value: i.and_then(Option::take).unwrap_or_else(f), key }, System);
103+
let ptr = Box::into_raw_with_allocator(value).0;
100104

101105
// SAFETY:
102106
// * key came from a `LazyKey` and is thus correct.
@@ -114,7 +118,7 @@ impl<T: 'static> Storage<T> {
114118
// initializer has already returned and the next scope only starts
115119
// after we return the pointer. Therefore, there can be no references
116120
// to the old value.
117-
drop(unsafe { Box::from_raw(old) });
121+
drop(unsafe { Box::from_raw_in(old, System) });
118122
}
119123

120124
// SAFETY: We just created this value above.
@@ -133,7 +137,7 @@ unsafe extern "C" fn destroy_value<T: 'static>(ptr: *mut u8) {
133137
// Note that to prevent an infinite loop we reset it back to null right
134138
// before we return from the destructor ourselves.
135139
abort_on_dtor_unwind(|| {
136-
let ptr = unsafe { Box::from_raw(ptr as *mut Value<T>) };
140+
let ptr = unsafe { Box::from_raw_in(ptr as *mut Value<T>, System) };
137141
let key = ptr.key;
138142
// SAFETY: `key` is the TLS key `ptr` was stored under.
139143
unsafe { set(key, ptr::without_provenance_mut(1)) };

library/std/src/thread/current.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -250,15 +250,16 @@ fn init_current(current: *mut ()) -> Thread {
250250
// extra TLS write above shouldn't matter. The alternative is nearly always
251251
// a stack overflow.
252252

253-
// If you came across this message, contact the author of your allocator.
254-
// If you are said author: A surprising amount of functions inside the
255-
// standard library (e.g. `Mutex`, `thread_local!`, `File` when using long
256-
// paths, even `panic!` when using unwinding), need memory allocation, so
257-
// you'll get circular dependencies all over the place when using them.
258-
// I (joboet) highly recommend using only APIs from core in your allocator
259-
// and implementing your own system abstractions. Still, if you feel that
260-
// a particular API should be entirely allocation-free, feel free to open
261-
// an issue on the Rust repository, we'll see what we can do.
253+
// If you came across this message, contact the author of your
254+
// allocator. If you are said author: A surprising amount of functions
255+
// inside the standard library (e.g. `Mutex`, `File` when using long
256+
// paths, even `panic!` when using unwinding), need memory allocation,
257+
// so you'll get circular dependencies all over the place when using
258+
// them. I (joboet) highly recommend using only APIs from core in your
259+
// allocator and implementing your own system abstractions. Still, if
260+
// you feel that a particular API should be entirely allocation-free,
261+
// feel free to open an issue on the Rust repository, we'll see what we
262+
// can do.
262263
rtabort!(
263264
"\n\
264265
Attempted to access thread-local data while allocating said data.\n\

library/std/src/thread/local.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,17 @@ use crate::fmt;
2424
/// [`with`]) within a thread, and values that implement [`Drop`] get
2525
/// destructed when a thread exits. Some platform-specific caveats apply, which
2626
/// are explained below.
27-
/// Note that if the destructor panics, the whole process will be [aborted].
27+
/// Note that, should the destructor panic, the whole process will be [aborted].
28+
/// On platforms where initialization requires memory allocation, this is
29+
/// performed directly through [`System`], allowing the [global allocator]
30+
/// to make use of thread local storage.
2831
///
2932
/// A `LocalKey`'s initializer cannot recursively depend on itself. Using a
3033
/// `LocalKey` in this way may cause panics, aborts, or infinite recursion on
3134
/// the first call to `with`.
3235
///
36+
/// [`System`]: crate::alloc::System
37+
/// [global allocator]: crate::alloc
3338
/// [aborted]: crate::process::abort
3439
///
3540
/// # Single-thread Synchronization

library/std/src/thread/mod.rs

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@
158158
#[cfg(all(test, not(any(target_os = "emscripten", target_os = "wasi"))))]
159159
mod tests;
160160

161+
use crate::alloc::System;
161162
use crate::any::Any;
162163
use crate::cell::UnsafeCell;
163164
use crate::ffi::CStr;
@@ -1231,21 +1232,45 @@ impl ThreadId {
12311232
}
12321233
}
12331234
_ => {
1234-
use crate::sync::{Mutex, PoisonError};
1235-
1236-
static COUNTER: Mutex<u64> = Mutex::new(0);
1235+
use crate::cell::SyncUnsafeCell;
1236+
use crate::hint::spin_loop;
1237+
use crate::sync::atomic::{Atomic, AtomicBool};
1238+
use crate::thread::yield_now;
1239+
1240+
// If we don't have a 64-bit atomic we use a small spinlock. We don't use Mutex
1241+
// here as we might be trying to get the current thread id in the global allocator,
1242+
// and on some platforms Mutex requires allocation.
1243+
static COUNTER_LOCKED: Atomic<bool> = AtomicBool::new(false);
1244+
static COUNTER: SyncUnsafeCell<u64> = SyncUnsafeCell::new(0);
1245+
1246+
// Acquire lock.
1247+
let mut spin = 0;
1248+
while COUNTER_LOCKED.compare_exchange_weak(false, true, Ordering::Acquire, Ordering::Relaxed).is_err() {
1249+
if spin <= 3 {
1250+
for _ in 0..(1 << spin) {
1251+
spin_loop();
1252+
}
1253+
} else {
1254+
yield_now();
1255+
}
1256+
spin += 1;
1257+
}
12371258

1238-
let mut counter = COUNTER.lock().unwrap_or_else(PoisonError::into_inner);
1239-
let Some(id) = counter.checked_add(1) else {
1240-
// in case the panic handler ends up calling `ThreadId::new()`,
1241-
// avoid reentrant lock acquire.
1242-
drop(counter);
1243-
exhausted();
1259+
let id;
1260+
// SAFETY: we have an exclusive lock on the counter.
1261+
unsafe {
1262+
id = (*COUNTER.get()).saturating_add(1);
1263+
(*COUNTER.get()) = id;
12441264
};
12451265

1246-
*counter = id;
1247-
drop(counter);
1248-
ThreadId(NonZero::new(id).unwrap())
1266+
// Release the lock.
1267+
COUNTER_LOCKED.store(false, Ordering::Release);
1268+
1269+
if id == u64::MAX {
1270+
exhausted()
1271+
} else {
1272+
ThreadId(NonZero::new(id).unwrap())
1273+
}
12491274
}
12501275
}
12511276
}
@@ -1439,7 +1464,10 @@ impl Inner {
14391464
///
14401465
/// [`thread::current`]: current::current
14411466
pub struct Thread {
1442-
inner: Pin<Arc<Inner>>,
1467+
// We use the System allocator such that creating or dropping this handle
1468+
// does not interfere with a potential Global allocator using thread-local
1469+
// storage.
1470+
inner: Pin<Arc<Inner, System>>,
14431471
}
14441472

14451473
impl Thread {
@@ -1452,7 +1480,7 @@ impl Thread {
14521480
// SAFETY: We pin the Arc immediately after creation, so its address never
14531481
// changes.
14541482
let inner = unsafe {
1455-
let mut arc = Arc::<Inner>::new_uninit();
1483+
let mut arc = Arc::<Inner, _>::new_uninit_in(System);
14561484
let ptr = Arc::get_mut_unchecked(&mut arc).as_mut_ptr();
14571485
(&raw mut (*ptr).name).write(name);
14581486
(&raw mut (*ptr).id).write(id);
@@ -1610,7 +1638,7 @@ impl Thread {
16101638
pub fn into_raw(self) -> *const () {
16111639
// Safety: We only expose an opaque pointer, which maintains the `Pin` invariant.
16121640
let inner = unsafe { Pin::into_inner_unchecked(self.inner) };
1613-
Arc::into_raw(inner) as *const ()
1641+
Arc::into_raw_with_allocator(inner).0 as *const ()
16141642
}
16151643

16161644
/// Constructs a `Thread` from a raw pointer.
@@ -1632,7 +1660,9 @@ impl Thread {
16321660
#[unstable(feature = "thread_raw", issue = "97523")]
16331661
pub unsafe fn from_raw(ptr: *const ()) -> Thread {
16341662
// Safety: Upheld by caller.
1635-
unsafe { Thread { inner: Pin::new_unchecked(Arc::from_raw(ptr as *const Inner)) } }
1663+
unsafe {
1664+
Thread { inner: Pin::new_unchecked(Arc::from_raw_in(ptr as *const Inner, System)) }
1665+
}
16361666
}
16371667

16381668
fn cname(&self) -> Option<&CStr> {
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
//@ run-pass
2+
//@ needs-threads
3+
4+
use std::alloc::{GlobalAlloc, Layout, System};
5+
use std::thread::Thread;
6+
use std::sync::atomic::{AtomicUsize, AtomicBool, Ordering};
7+
8+
static GLOBAL: AtomicUsize = AtomicUsize::new(0);
9+
10+
struct Local(Thread);
11+
12+
thread_local! {
13+
static LOCAL: Local = {
14+
GLOBAL.fetch_or(1, Ordering::Relaxed);
15+
Local(std::thread::current())
16+
};
17+
}
18+
19+
impl Drop for Local {
20+
fn drop(&mut self) {
21+
GLOBAL.fetch_or(2, Ordering::Relaxed);
22+
}
23+
}
24+
25+
static SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS: AtomicBool = AtomicBool::new(false);
26+
27+
#[global_allocator]
28+
static ALLOC: Alloc = Alloc;
29+
struct Alloc;
30+
31+
unsafe impl GlobalAlloc for Alloc {
32+
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
33+
// Make sure we aren't re-entrant.
34+
assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed));
35+
SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed);
36+
LOCAL.with(|local| {
37+
assert!(local.0.id() == std::thread::current().id());
38+
});
39+
let ret = unsafe { System.alloc(layout) };
40+
SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed);
41+
ret
42+
}
43+
44+
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
45+
// Make sure we aren't re-entrant.
46+
assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed));
47+
SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed);
48+
LOCAL.with(|local| {
49+
assert!(local.0.id() == std::thread::current().id());
50+
});
51+
unsafe { System.dealloc(ptr, layout) }
52+
SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed);
53+
}
54+
}
55+
56+
fn main() {
57+
std::thread::spawn(|| {
58+
std::hint::black_box(vec![1, 2]);
59+
assert!(GLOBAL.load(Ordering::Relaxed) == 1);
60+
})
61+
.join()
62+
.unwrap();
63+
assert!(GLOBAL.load(Ordering::Relaxed) == 3);
64+
}

0 commit comments

Comments
 (0)