Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit d500a34

Browse files
committedMar 11, 2025
Auto merge of #136401 - Mark-Simulacrum:lockfree-as-str, r=<try>
Lazy-chunk Symbol interner This fixes the unsoundness in Symbol::as_str, while also making it a bit faster (replacing locking with a single atomic load).
2 parents 9fb94b3 + de49a82 commit d500a34

File tree

7 files changed

+185
-51
lines changed

7 files changed

+185
-51
lines changed
 

‎Cargo.lock

+3
Original file line numberDiff line numberDiff line change
@@ -3492,6 +3492,7 @@ dependencies = [
34923492
"either",
34933493
"elsa",
34943494
"ena",
3495+
"hashbrown 0.15.2",
34953496
"indexmap",
34963497
"jobserver",
34973498
"libc",
@@ -4411,6 +4412,8 @@ version = "0.0.0"
44114412
dependencies = [
44124413
"blake3",
44134414
"derive-where",
4415+
"elsa",
4416+
"hashbrown 0.15.2",
44144417
"indexmap",
44154418
"itoa",
44164419
"md-5",

‎compiler/rustc_data_structures/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ bitflags = "2.4.1"
1010
either = "1.0"
1111
elsa = "1.11.0"
1212
ena = "0.14.3"
13+
hashbrown = { version = "0.15.2", default-features = false }
1314
indexmap = "2.4.0"
1415
jobserver_crate = { version = "0.1.28", package = "jobserver" }
1516
measureme = "11"

‎compiler/rustc_data_structures/src/marker.rs

+2
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ impl_dyn_send!(
7070
[std::sync::LazyLock<T, F> where T: DynSend, F: DynSend]
7171
[std::collections::HashSet<K, S> where K: DynSend, S: DynSend]
7272
[std::collections::HashMap<K, V, S> where K: DynSend, V: DynSend, S: DynSend]
73+
[hashbrown::HashTable<T> where T: DynSend]
7374
[std::collections::BTreeMap<K, V, A> where K: DynSend, V: DynSend, A: std::alloc::Allocator + Clone + DynSend]
7475
[Vec<T, A> where T: DynSend, A: std::alloc::Allocator + DynSend]
7576
[Box<T, A> where T: ?Sized + DynSend, A: std::alloc::Allocator + DynSend]
@@ -144,6 +145,7 @@ impl_dyn_sync!(
144145
[std::sync::LazyLock<T, F> where T: DynSend + DynSync, F: DynSend]
145146
[std::collections::HashSet<K, S> where K: DynSync, S: DynSync]
146147
[std::collections::HashMap<K, V, S> where K: DynSync, V: DynSync, S: DynSync]
148+
[hashbrown::HashTable<T> where T: DynSync]
147149
[std::collections::BTreeMap<K, V, A> where K: DynSync, V: DynSync, A: std::alloc::Allocator + Clone + DynSync]
148150
[Vec<T, A> where T: DynSync, A: std::alloc::Allocator + DynSync]
149151
[Box<T, A> where T: ?Sized + DynSync, A: std::alloc::Allocator + DynSync]

‎compiler/rustc_span/Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ edition = "2024"
77
# tidy-alphabetical-start
88
blake3 = "1.5.2"
99
derive-where = "1.2.7"
10+
elsa = "1.11.0"
11+
hashbrown = { version = "0.15.2", default-features = false }
1012
indexmap = { version = "2.0.0" }
1113
itoa = "1.0"
1214
md5 = { package = "md-5", version = "0.10.0" }

‎compiler/rustc_span/src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,14 @@
2727
#![feature(let_chains)]
2828
#![feature(map_try_insert)]
2929
#![feature(negative_impls)]
30+
#![feature(new_zeroed_alloc)]
3031
#![feature(read_buf)]
3132
#![feature(round_char_boundary)]
3233
#![feature(rustc_attrs)]
3334
#![feature(rustdoc_internals)]
3435
#![feature(slice_as_chunks)]
36+
#![feature(str_from_raw_parts)]
37+
#![feature(sync_unsafe_cell)]
3538
#![warn(unreachable_pub)]
3639
// tidy-alphabetical-end
3740

‎compiler/rustc_span/src/symbol.rs

+167-46
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22
//! allows bidirectional lookup; i.e., given a value, one can easily find the
33
//! type, and vice versa.
44
5-
use std::hash::{Hash, Hasher};
5+
use std::cell::SyncUnsafeCell;
6+
use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher};
7+
use std::sync::LazyLock;
8+
use std::sync::atomic::{AtomicU32, Ordering};
69
use std::{fmt, str};
710

8-
use rustc_arena::DroplessArena;
9-
use rustc_data_structures::fx::FxIndexSet;
11+
use rustc_data_structures::fx::FxHasher;
1012
use rustc_data_structures::stable_hasher::{
1113
HashStable, StableCompare, StableHasher, ToStableHashKey,
1214
};
@@ -2497,18 +2499,9 @@ impl Symbol {
24972499
with_session_globals(|session_globals| session_globals.symbol_interner.intern(string))
24982500
}
24992501

2500-
/// Access the underlying string. This is a slowish operation because it
2501-
/// requires locking the symbol interner.
2502-
///
2503-
/// Note that the lifetime of the return value is a lie. It's not the same
2504-
/// as `&self`, but actually tied to the lifetime of the underlying
2505-
/// interner. Interners are long-lived, and there are very few of them, and
2506-
/// this function is typically used for short-lived things, so in practice
2507-
/// it works out ok.
2502+
/// Access the underlying string.
25082503
pub fn as_str(&self) -> &str {
2509-
with_session_globals(|session_globals| unsafe {
2510-
std::mem::transmute::<&str, &str>(session_globals.symbol_interner.get(*self))
2511-
})
2504+
with_session_globals(|session_globals| session_globals.symbol_interner.get(*self))
25122505
}
25132506

25142507
pub fn as_u32(self) -> u32 {
@@ -2563,53 +2556,181 @@ impl StableCompare for Symbol {
25632556
}
25642557
}
25652558

2566-
pub(crate) struct Interner(Lock<InternerInner>);
2559+
// This is never de-initialized and stores interned &str in static storage.
2560+
// Each str is stored length-prefixed (u32), and we allow for random-access indexing with a u32
2561+
// index by direct lookup in the arena. Indices <2^16 are stored in a separate structure (they are
2562+
// pre-allocated at dense addresses so we can't use the same lockless O(1) hack for them).
2563+
static GLOBAL_ARENA: LazyLock<StringArena> = LazyLock::new(|| StringArena::new());
25672564

2568-
// The `&'static str`s in this type actually point into the arena.
2569-
//
2570-
// This type is private to prevent accidentally constructing more than one
2571-
// `Interner` on the same thread, which makes it easy to mix up `Symbol`s
2572-
// between `Interner`s.
2573-
struct InternerInner {
2574-
arena: DroplessArena,
2575-
strings: FxIndexSet<&'static str>,
2565+
const CHUNK_SIZE: usize = 4 * 1024 * 1024;
2566+
const CHUNKS: usize = (u32::MAX as usize).div_ceil(CHUNK_SIZE);
2567+
2568+
struct StringChunk {
2569+
array: LazyLock<Box<[SyncUnsafeCell<u8>; CHUNK_SIZE]>>,
25762570
}
25772571

2578-
impl Interner {
2579-
fn prefill(init: &[&'static str]) -> Self {
2580-
Interner(Lock::new(InternerInner {
2581-
arena: Default::default(),
2582-
strings: init.iter().copied().collect(),
2583-
}))
2572+
impl Default for StringChunk {
2573+
fn default() -> Self {
2574+
Self {
2575+
array: LazyLock::new(|| unsafe {
2576+
// SAFETY: Zero-init'd UnsafeCell<u8> is initialized and has no other invariants to
2577+
// worry about.
2578+
Box::new_zeroed().assume_init()
2579+
}),
2580+
}
25842581
}
2582+
}
25852583

2586-
#[inline]
2587-
fn intern(&self, string: &str) -> Symbol {
2588-
let mut inner = self.0.lock();
2589-
if let Some(idx) = inner.strings.get_index_of(string) {
2590-
return Symbol::new(idx as u32);
2584+
struct StringArena {
2585+
chunks: [StringChunk; CHUNKS],
2586+
next_start: AtomicU32,
2587+
interned: elsa::sync::LockFreeFrozenVec<InternedString>,
2588+
}
2589+
2590+
#[derive(Copy, Clone)]
2591+
struct InternedString {
2592+
start: u32,
2593+
length: u32,
2594+
}
2595+
2596+
impl StringArena {
2597+
fn new() -> Self {
2598+
StringArena {
2599+
chunks: std::array::from_fn(|_| StringChunk::default()),
2600+
next_start: AtomicU32::new(0),
2601+
interned: Default::default(),
2602+
}
2603+
}
2604+
2605+
fn next(previous: u32, length: u32) -> u32 {
2606+
let end = previous.checked_add(length).unwrap();
2607+
if previous / CHUNK_SIZE as u32 == end / CHUNK_SIZE as u32 {
2608+
end
2609+
} else {
2610+
// If we don't fit in the previous chunk, bump to the start of the next chunk, and set
2611+
// length to the end.
2612+
previous.next_multiple_of(CHUNK_SIZE as u32) + length
2613+
}
2614+
}
2615+
2616+
/// Copy the passed &str into the arena. Returns an index that can be passed to `get` to
2617+
/// retrieve the &str.
2618+
///
2619+
/// u32 is guaranteed to be at least u16::MAX.
2620+
fn alloc(&self, s: &str) -> u32 {
2621+
let len = u32::try_from(s.len()).unwrap();
2622+
assert!(len < CHUNK_SIZE as u32);
2623+
2624+
let previous = self
2625+
.next_start
2626+
.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |previous| {
2627+
Some(Self::next(previous, len))
2628+
})
2629+
.unwrap();
2630+
let end = Self::next(previous, len);
2631+
let start = end - len;
2632+
2633+
let chunk = LazyLock::force(&self.chunks[start as usize / CHUNK_SIZE].array);
2634+
let offset = start as usize % CHUNK_SIZE;
2635+
2636+
// SAFETY:
2637+
//
2638+
// * `next_start` only increases, and always uniquely allocates `len` bytes. No one can read
2639+
// this memory yet as we haven't pushed yet to `interned`.
2640+
// * all chunks are zero-init'd at allocation: no uninitialized memory here.
2641+
let dst = unsafe {
2642+
std::slice::from_raw_parts_mut(
2643+
chunk.as_ptr().cast::<u8>().add(offset).cast_mut(),
2644+
s.len(),
2645+
)
2646+
};
2647+
dst.copy_from_slice(s.as_bytes());
2648+
2649+
let idx = self.interned.push(InternedString { start, length: len });
2650+
2651+
idx.try_into().unwrap()
2652+
}
2653+
2654+
/// Get the allocated string at the passed index.
2655+
///
2656+
/// Note that this **does not** check that the passed index is actually an index returned by
2657+
/// `alloc`.
2658+
fn get(&self, idx: u32) -> &str {
2659+
let interned = self.interned.get(idx as usize).unwrap_or_else(|| {
2660+
panic!("non-interned symbol index: {idx}");
2661+
});
2662+
2663+
let chunk = LazyLock::force(&self.chunks[interned.start as usize / CHUNK_SIZE].array);
2664+
let offset = interned.start as usize % CHUNK_SIZE;
2665+
2666+
// We write the string into this memory range prior to pushing into `interned`, so this is
2667+
// guaranteed UTF-8 and initialized. `next_start` is strictly increasing so we never write
2668+
// twice.
2669+
unsafe {
2670+
std::str::from_raw_parts(
2671+
chunk.as_ptr().add(offset).cast::<u8>(),
2672+
interned.length as usize,
2673+
)
25912674
}
2675+
}
2676+
}
25922677

2593-
let string: &str = inner.arena.alloc_str(string);
2678+
pub(crate) struct Interner(&'static [&'static str], Lock<InternerInner>);
25942679

2595-
// SAFETY: we can extend the arena allocation to `'static` because we
2596-
// only access these while the arena is still alive.
2597-
let string: &'static str = unsafe { &*(string as *const str) };
2680+
struct InternerInner {
2681+
strings: hashbrown::HashTable<Symbol>,
2682+
}
25982683

2599-
// This second hash table lookup can be avoided by using `RawEntryMut`,
2600-
// but this code path isn't hot enough for it to be worth it. See
2601-
// #91445 for details.
2602-
let (idx, is_new) = inner.strings.insert_full(string);
2603-
debug_assert!(is_new); // due to the get_index_of check above
2684+
impl Interner {
2685+
fn prefill(init: &'static [&'static str]) -> Self {
2686+
assert!(init.len() < u16::MAX as usize);
2687+
let mut strings = hashbrown::HashTable::new();
2688+
2689+
for (idx, s) in init.iter().copied().enumerate() {
2690+
let mut hasher = FxHasher::default();
2691+
s.hash(&mut hasher);
2692+
let hash = hasher.finish();
2693+
strings.insert_unique(hash, Symbol::new(idx as u32), |val| {
2694+
// has to be from `init` because we haven't yet inserted anything except those.
2695+
BuildHasherDefault::<FxHasher>::default().hash_one(init[val.0.index()])
2696+
});
2697+
}
26042698

2605-
Symbol::new(idx as u32)
2699+
Interner(init, Lock::new(InternerInner { strings }))
2700+
}
2701+
2702+
#[inline]
2703+
fn intern(&self, string: &str) -> Symbol {
2704+
let hash = BuildHasherDefault::<FxHasher>::default().hash_one(string);
2705+
let mut inner = self.1.lock();
2706+
match inner.strings.find_entry(hash, |v| self.get(*v) == string) {
2707+
Ok(e) => return *e.get(),
2708+
Err(e) => {
2709+
let idx = GLOBAL_ARENA.alloc(string);
2710+
// Reserve 2^16 u32 indices -- these will be used for pre-filled interning where we
2711+
// have a dense SymbolIndex space. We could make this exact but it doesn't really
2712+
// matter in practice, we won't run out of symbol space.
2713+
let idx = u32::from(u16::MAX).checked_add(idx).unwrap();
2714+
let res = Symbol::new(idx as u32);
2715+
2716+
e.into_table().insert_unique(hash, res, |val| {
2717+
BuildHasherDefault::<FxHasher>::default().hash_one(self.get(*val))
2718+
});
2719+
2720+
res
2721+
}
2722+
}
26062723
}
26072724

26082725
/// Get the symbol as a string.
26092726
///
26102727
/// [`Symbol::as_str()`] should be used in preference to this function.
2611-
fn get(&self, symbol: Symbol) -> &str {
2612-
self.0.lock().strings.get_index(symbol.0.as_usize()).unwrap()
2728+
fn get(&self, symbol: Symbol) -> &'static str {
2729+
if let Some(interned) = symbol.0.as_u32().checked_sub(u32::from(u16::MAX)) {
2730+
GLOBAL_ARENA.get(interned)
2731+
} else {
2732+
self.0[symbol.0.index()]
2733+
}
26132734
}
26142735
}
26152736

‎compiler/rustc_span/src/symbol/tests.rs

+7-5
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,17 @@ use crate::create_default_session_globals_then;
44
#[test]
55
fn interner_tests() {
66
let i = Interner::prefill(&[]);
7+
let dog = i.intern("dog");
78
// first one is zero:
8-
assert_eq!(i.intern("dog"), Symbol::new(0));
9+
assert_eq!(i.intern("dog"), dog);
910
// re-use gets the same entry:
10-
assert_eq!(i.intern("dog"), Symbol::new(0));
11+
assert_eq!(i.intern("dog"), dog);
1112
// different string gets a different #:
12-
assert_eq!(i.intern("cat"), Symbol::new(1));
13-
assert_eq!(i.intern("cat"), Symbol::new(1));
13+
let cat = i.intern("cat");
14+
assert_eq!(i.intern("cat"), cat);
15+
assert_eq!(i.intern("cat"), cat);
1416
// dog is still at zero
15-
assert_eq!(i.intern("dog"), Symbol::new(0));
17+
assert_eq!(i.intern("dog"), dog);
1618
}
1719

1820
#[test]

0 commit comments

Comments
 (0)
Failed to load comments.