Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 75fac58

Browse files
committedMar 15, 2025
Auto merge of rust-lang#130324 - petrochenkov:ctxtache, r=<try>
hygiene: Ensure uniqueness of `SyntaxContextData`s `SyntaxContextData`s are basically interned with `SyntaxContext`s working as indices, so they are supposed to be unique. However, currently duplicate `SyntaxContextData`s can be created during decoding from metadata or incremental cache. This PR fixes that. cc rust-lang#129827 (comment)
2 parents adea7cb + 55d9fe3 commit 75fac58

File tree

1 file changed

+120
-73
lines changed

1 file changed

+120
-73
lines changed
 

‎compiler/rustc_span/src/hygiene.rs

+120-73
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727
use std::cell::RefCell;
2828
use std::collections::hash_map::Entry;
2929
use std::collections::hash_set::Entry as SetEntry;
30-
use std::fmt;
3130
use std::hash::Hash;
3231
use std::sync::Arc;
32+
use std::{fmt, iter, mem};
3333

3434
use rustc_data_structures::fingerprint::Fingerprint;
3535
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
@@ -57,7 +57,11 @@ pub struct SyntaxContext(u32);
5757
impl !Ord for SyntaxContext {}
5858
impl !PartialOrd for SyntaxContext {}
5959

60-
#[derive(Debug, Encodable, Decodable, Clone)]
60+
/// If this part of two syntax contexts is equal, then the whole syntax contexts should be equal.
61+
/// The other fields are only for caching.
62+
type SyntaxContextKey = (SyntaxContext, ExpnId, Transparency);
63+
64+
#[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable)]
6165
pub struct SyntaxContextData {
6266
outer_expn: ExpnId,
6367
outer_transparency: Transparency,
@@ -70,6 +74,31 @@ pub struct SyntaxContextData {
7074
dollar_crate_name: Symbol,
7175
}
7276

77+
impl SyntaxContextData {
78+
fn root() -> SyntaxContextData {
79+
SyntaxContextData {
80+
outer_expn: ExpnId::root(),
81+
outer_transparency: Transparency::Opaque,
82+
parent: SyntaxContext::root(),
83+
opaque: SyntaxContext::root(),
84+
opaque_and_semitransparent: SyntaxContext::root(),
85+
dollar_crate_name: kw::DollarCrate,
86+
}
87+
}
88+
89+
fn decode_placeholder() -> SyntaxContextData {
90+
SyntaxContextData { dollar_crate_name: kw::Empty, ..SyntaxContextData::root() }
91+
}
92+
93+
fn is_decode_placeholder(&self) -> bool {
94+
self.dollar_crate_name == kw::Empty
95+
}
96+
97+
fn key(&self) -> SyntaxContextKey {
98+
(self.parent, self.outer_expn, self.outer_transparency)
99+
}
100+
}
101+
73102
rustc_index::newtype_index! {
74103
/// A unique ID associated with a macro invocation and expansion.
75104
#[orderable]
@@ -342,7 +371,7 @@ pub(crate) struct HygieneData {
342371
foreign_expn_hashes: FxHashMap<ExpnId, ExpnHash>,
343372
expn_hash_to_expn_id: UnhashMap<ExpnHash, ExpnId>,
344373
syntax_context_data: Vec<SyntaxContextData>,
345-
syntax_context_map: FxHashMap<(SyntaxContext, ExpnId, Transparency), SyntaxContext>,
374+
syntax_context_map: FxHashMap<SyntaxContextKey, SyntaxContext>,
346375
/// Maps the `local_hash` of an `ExpnData` to the next disambiguator value.
347376
/// This is used by `update_disambiguator` to keep track of which `ExpnData`s
348377
/// would have collisions without a disambiguator.
@@ -361,22 +390,16 @@ impl HygieneData {
361390
None,
362391
);
363392

393+
let root_ctxt_data = SyntaxContextData::root();
364394
HygieneData {
365395
local_expn_data: IndexVec::from_elem_n(Some(root_data), 1),
366396
local_expn_hashes: IndexVec::from_elem_n(ExpnHash(Fingerprint::ZERO), 1),
367397
foreign_expn_data: FxHashMap::default(),
368398
foreign_expn_hashes: FxHashMap::default(),
369-
expn_hash_to_expn_id: std::iter::once((ExpnHash(Fingerprint::ZERO), ExpnId::root()))
399+
expn_hash_to_expn_id: iter::once((ExpnHash(Fingerprint::ZERO), ExpnId::root()))
370400
.collect(),
371-
syntax_context_data: vec![SyntaxContextData {
372-
outer_expn: ExpnId::root(),
373-
outer_transparency: Transparency::Opaque,
374-
parent: SyntaxContext(0),
375-
opaque: SyntaxContext(0),
376-
opaque_and_semitransparent: SyntaxContext(0),
377-
dollar_crate_name: kw::DollarCrate,
378-
}],
379-
syntax_context_map: FxHashMap::default(),
401+
syntax_context_data: vec![root_ctxt_data],
402+
syntax_context_map: iter::once((root_ctxt_data.key(), SyntaxContext(0))).collect(),
380403
expn_data_disambiguators: UnhashMap::default(),
381404
}
382405
}
@@ -425,23 +448,28 @@ impl HygieneData {
425448
}
426449

427450
fn normalize_to_macros_2_0(&self, ctxt: SyntaxContext) -> SyntaxContext {
451+
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
428452
self.syntax_context_data[ctxt.0 as usize].opaque
429453
}
430454

431455
fn normalize_to_macro_rules(&self, ctxt: SyntaxContext) -> SyntaxContext {
456+
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
432457
self.syntax_context_data[ctxt.0 as usize].opaque_and_semitransparent
433458
}
434459

435460
fn outer_expn(&self, ctxt: SyntaxContext) -> ExpnId {
461+
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
436462
self.syntax_context_data[ctxt.0 as usize].outer_expn
437463
}
438464

439465
fn outer_mark(&self, ctxt: SyntaxContext) -> (ExpnId, Transparency) {
466+
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
440467
let data = &self.syntax_context_data[ctxt.0 as usize];
441468
(data.outer_expn, data.outer_transparency)
442469
}
443470

444471
fn parent_ctxt(&self, ctxt: SyntaxContext) -> SyntaxContext {
472+
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
445473
self.syntax_context_data[ctxt.0 as usize].parent
446474
}
447475

@@ -551,6 +579,7 @@ impl HygieneData {
551579
transparency: Transparency,
552580
) -> SyntaxContext {
553581
let syntax_context_data = &mut self.syntax_context_data;
582+
debug_assert!(!syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
554583
let mut opaque = syntax_context_data[ctxt.0 as usize].opaque;
555584
let mut opaque_and_semitransparent =
556585
syntax_context_data[ctxt.0 as usize].opaque_and_semitransparent;
@@ -561,7 +590,7 @@ impl HygieneData {
561590
.syntax_context_map
562591
.entry((parent, expn_id, transparency))
563592
.or_insert_with(|| {
564-
let new_opaque = SyntaxContext(syntax_context_data.len() as u32);
593+
let new_opaque = SyntaxContext::from_usize(syntax_context_data.len());
565594
syntax_context_data.push(SyntaxContextData {
566595
outer_expn: expn_id,
567596
outer_transparency: transparency,
@@ -581,7 +610,7 @@ impl HygieneData {
581610
.entry((parent, expn_id, transparency))
582611
.or_insert_with(|| {
583612
let new_opaque_and_semitransparent =
584-
SyntaxContext(syntax_context_data.len() as u32);
613+
SyntaxContext::from_usize(syntax_context_data.len());
585614
syntax_context_data.push(SyntaxContextData {
586615
outer_expn: expn_id,
587616
outer_transparency: transparency,
@@ -596,8 +625,6 @@ impl HygieneData {
596625

597626
let parent = ctxt;
598627
*self.syntax_context_map.entry((parent, expn_id, transparency)).or_insert_with(|| {
599-
let new_opaque_and_semitransparent_and_transparent =
600-
SyntaxContext(syntax_context_data.len() as u32);
601628
syntax_context_data.push(SyntaxContextData {
602629
outer_expn: expn_id,
603630
outer_transparency: transparency,
@@ -606,7 +633,7 @@ impl HygieneData {
606633
opaque_and_semitransparent,
607634
dollar_crate_name: kw::DollarCrate,
608635
});
609-
new_opaque_and_semitransparent_and_transparent
636+
SyntaxContext::from_usize(syntax_context_data.len() - 1)
610637
})
611638
}
612639
}
@@ -626,25 +653,26 @@ pub fn walk_chain_collapsed(span: Span, to: Span) -> Span {
626653

627654
pub fn update_dollar_crate_names(mut get_name: impl FnMut(SyntaxContext) -> Symbol) {
628655
// The new contexts that need updating are at the end of the list and have `$crate` as a name.
629-
let (len, to_update) = HygieneData::with(|data| {
630-
(
631-
data.syntax_context_data.len(),
632-
data.syntax_context_data
633-
.iter()
634-
.rev()
635-
.take_while(|scdata| scdata.dollar_crate_name == kw::DollarCrate)
636-
.count(),
637-
)
656+
// Also decoding placeholders can be encountered among both old and new contexts.
657+
let mut to_update = vec![];
658+
HygieneData::with(|data| {
659+
for (idx, scdata) in data.syntax_context_data.iter().enumerate().rev() {
660+
if scdata.dollar_crate_name == kw::DollarCrate {
661+
to_update.push((idx, kw::DollarCrate));
662+
} else if !scdata.is_decode_placeholder() {
663+
break;
664+
}
665+
}
638666
});
639667
// The callback must be called from outside of the `HygieneData` lock,
640668
// since it will try to acquire it too.
641-
let range_to_update = len - to_update..len;
642-
let names: Vec<_> =
643-
range_to_update.clone().map(|idx| get_name(SyntaxContext::from_u32(idx as u32))).collect();
669+
for (idx, name) in &mut to_update {
670+
*name = get_name(SyntaxContext::from_usize(*idx));
671+
}
644672
HygieneData::with(|data| {
645-
range_to_update.zip(names).for_each(|(idx, name)| {
673+
for (idx, name) in to_update {
646674
data.syntax_context_data[idx].dollar_crate_name = name;
647-
})
675+
}
648676
})
649677
}
650678

@@ -713,6 +741,10 @@ impl SyntaxContext {
713741
SyntaxContext(raw as u32)
714742
}
715743

744+
fn from_usize(raw: usize) -> SyntaxContext {
745+
SyntaxContext(u32::try_from(raw).unwrap())
746+
}
747+
716748
/// Extend a syntax context with a given expansion and transparency.
717749
pub fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> SyntaxContext {
718750
HygieneData::with(|data| data.apply_mark(self, expn_id, transparency))
@@ -893,7 +925,10 @@ impl SyntaxContext {
893925
}
894926

895927
pub(crate) fn dollar_crate_name(self) -> Symbol {
896-
HygieneData::with(|data| data.syntax_context_data[self.0 as usize].dollar_crate_name)
928+
HygieneData::with(|data| {
929+
debug_assert!(!data.syntax_context_data[self.0 as usize].is_decode_placeholder());
930+
data.syntax_context_data[self.0 as usize].dollar_crate_name
931+
})
897932
}
898933

899934
pub fn edition(self) -> Edition {
@@ -1244,7 +1279,7 @@ impl HygieneEncodeContext {
12441279

12451280
// Consume the current round of SyntaxContexts.
12461281
// Drop the lock() temporary early
1247-
let latest_ctxts = { std::mem::take(&mut *self.latest_ctxts.lock()) };
1282+
let latest_ctxts = { mem::take(&mut *self.latest_ctxts.lock()) };
12481283

12491284
// It's fine to iterate over a HashMap, because the serialization
12501285
// of the table that we insert data into doesn't depend on insertion
@@ -1256,7 +1291,7 @@ impl HygieneEncodeContext {
12561291
}
12571292
});
12581293

1259-
let latest_expns = { std::mem::take(&mut *self.latest_expns.lock()) };
1294+
let latest_expns = { mem::take(&mut *self.latest_expns.lock()) };
12601295

12611296
// Same as above, this is fine as we are inserting into a order-independent hashset
12621297
#[allow(rustc::potential_query_instability)]
@@ -1373,28 +1408,33 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
13731408
return SyntaxContext::root();
13741409
}
13751410

1376-
let ctxt = {
1411+
let pending_ctxt = {
13771412
let mut inner = context.inner.lock();
13781413

1414+
// Reminder: `HygieneDecodeContext` is per-crate, so there are no collisions between
1415+
// raw ids from different crate metadatas.
13791416
if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() {
13801417
// This has already been decoded.
13811418
return ctxt;
13821419
}
13831420

13841421
match inner.decoding.entry(raw_id) {
13851422
Entry::Occupied(ctxt_entry) => {
1423+
let pending_ctxt = *ctxt_entry.get();
13861424
match context.local_in_progress.borrow_mut().entry(raw_id) {
1387-
SetEntry::Occupied(..) => {
1388-
// We're decoding this already on the current thread. Return here
1389-
// and let the function higher up the stack finish decoding to handle
1390-
// recursive cases.
1391-
return *ctxt_entry.get();
1392-
}
1425+
// We're decoding this already on the current thread. Return here and let the
1426+
// function higher up the stack finish decoding to handle recursive cases.
1427+
// Hopefully having a `SyntaxContext` that refers to an incorrect data is ok
1428+
// during reminder of the decoding process, it's certainly not ok after the
1429+
// top level decoding function returns.
1430+
SetEntry::Occupied(..) => return pending_ctxt,
1431+
// Some other thread is currently decoding this.
1432+
// Race with it (alternatively we could wait here).
1433+
// We cannot return this value, unlike in the recursive case above, because it
1434+
// may expose a `SyntaxContext` pointing to incorrect data to arbitrary code.
13931435
SetEntry::Vacant(entry) => {
13941436
entry.insert();
1395-
1396-
// Some other thread is current decoding this. Race with it.
1397-
*ctxt_entry.get()
1437+
pending_ctxt
13981438
}
13991439
}
14001440
}
@@ -1405,18 +1445,10 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
14051445
// Allocate and store SyntaxContext id *before* calling the decoder function,
14061446
// as the SyntaxContextData may reference itself.
14071447
let new_ctxt = HygieneData::with(|hygiene_data| {
1408-
let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32);
14091448
// Push a dummy SyntaxContextData to ensure that nobody else can get the
1410-
// same ID as us. This will be overwritten after call `decode_Data`
1411-
hygiene_data.syntax_context_data.push(SyntaxContextData {
1412-
outer_expn: ExpnId::root(),
1413-
outer_transparency: Transparency::Transparent,
1414-
parent: SyntaxContext::root(),
1415-
opaque: SyntaxContext::root(),
1416-
opaque_and_semitransparent: SyntaxContext::root(),
1417-
dollar_crate_name: kw::Empty,
1418-
});
1419-
new_ctxt
1449+
// same ID as us. This will be overwritten after call `decode_data`.
1450+
hygiene_data.syntax_context_data.push(SyntaxContextData::decode_placeholder());
1451+
SyntaxContext::from_usize(hygiene_data.syntax_context_data.len() - 1)
14201452
});
14211453
entry.insert(new_ctxt);
14221454
new_ctxt
@@ -1426,27 +1458,42 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
14261458

14271459
// Don't try to decode data while holding the lock, since we need to
14281460
// be able to recursively decode a SyntaxContext
1429-
let mut ctxt_data = decode_data(d, raw_id);
1430-
// Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names`
1431-
// We don't care what the encoding crate set this to - we want to resolve it
1432-
// from the perspective of the current compilation session
1433-
ctxt_data.dollar_crate_name = kw::DollarCrate;
1434-
1435-
// Overwrite the dummy data with our decoded SyntaxContextData
1436-
HygieneData::with(|hygiene_data| {
1437-
if let Some(old) = hygiene_data.syntax_context_data.get(raw_id as usize)
1438-
&& old.outer_expn == ctxt_data.outer_expn
1439-
&& old.outer_transparency == ctxt_data.outer_transparency
1440-
&& old.parent == ctxt_data.parent
1441-
{
1442-
ctxt_data = old.clone();
1461+
let ctxt_data = decode_data(d, raw_id);
1462+
let ctxt_key = ctxt_data.key();
1463+
1464+
let ctxt = HygieneData::with(|hygiene_data| {
1465+
match hygiene_data.syntax_context_map.get(&ctxt_key) {
1466+
// Ensure that syntax contexts are unique.
1467+
// If syntax contexts with the given key already exists, reuse it instead of
1468+
// using `pending_ctxt`.
1469+
// `pending_ctxt` will leave an unused hole in the vector of syntax contexts.
1470+
// Hopefully its value isn't stored anywhere during decoding and its dummy data
1471+
// is never accessed later. The `is_decode_placeholder` asserts on all
1472+
// accesses to syntax context data attempt to ensure it.
1473+
Some(&ctxt) => ctxt,
1474+
// This is a completely new context.
1475+
// Overwrite its placeholder data with our decoded data.
1476+
None => {
1477+
let ctxt_data_ref =
1478+
&mut hygiene_data.syntax_context_data[pending_ctxt.as_u32() as usize];
1479+
let prev_ctxt_data = mem::replace(ctxt_data_ref, ctxt_data);
1480+
// Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names`.
1481+
// We don't care what the encoding crate set this to - we want to resolve it
1482+
// from the perspective of the current compilation session.
1483+
ctxt_data_ref.dollar_crate_name = kw::DollarCrate;
1484+
// Make sure nothing weird happened while `decode_data` was running.
1485+
if !prev_ctxt_data.is_decode_placeholder() {
1486+
// Another thread may have already inserted the decoded data,
1487+
// but the decoded data should match.
1488+
assert_eq!(prev_ctxt_data, *ctxt_data_ref);
1489+
}
1490+
hygiene_data.syntax_context_map.insert(ctxt_key, pending_ctxt);
1491+
pending_ctxt
1492+
}
14431493
}
1444-
1445-
hygiene_data.syntax_context_data[ctxt.as_u32() as usize] = ctxt_data;
14461494
});
14471495

14481496
// Mark the context as completed
1449-
14501497
context.local_in_progress.borrow_mut().remove(&raw_id);
14511498

14521499
let mut inner = context.inner.lock();

0 commit comments

Comments
 (0)
Failed to load comments.