Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit b517457

Browse files
committedSep 13, 2024
Auto merge of rust-lang#130324 - petrochenkov:ctxtache, r=<try>
hygiene: Ensure uniqueness of `SyntaxContextData`s `SyntaxContextData`s are basically interned with `SyntaxContext`s working as keys, so they are supposed to be unique. However, currently duplicate `SyntaxContextData`s can be created during decoding from metadata or incremental cache. This PR fixes that. cc rust-lang#129827 (comment)
2 parents 0307e40 + e577b7a commit b517457

File tree

1 file changed

+107
-70
lines changed

1 file changed

+107
-70
lines changed
 

‎compiler/rustc_span/src/hygiene.rs

+107-70
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626

2727
use std::cell::RefCell;
2828
use std::collections::hash_map::Entry;
29-
use std::fmt;
3029
use std::hash::Hash;
30+
use std::{fmt, iter, mem};
3131

3232
use rustc_data_structures::fingerprint::Fingerprint;
3333
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
@@ -54,7 +54,11 @@ pub struct SyntaxContext(u32);
5454
impl !Ord for SyntaxContext {}
5555
impl !PartialOrd for SyntaxContext {}
5656

57-
#[derive(Debug, Encodable, Decodable, Clone)]
57+
/// If this part of two syntax contexts is equal, then the whole syntax contexts should be equal.
58+
/// The other fields are only for caching.
59+
type SyntaxContextKey = (SyntaxContext, ExpnId, Transparency);
60+
61+
#[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable)]
5862
pub struct SyntaxContextData {
5963
outer_expn: ExpnId,
6064
outer_transparency: Transparency,
@@ -67,6 +71,31 @@ pub struct SyntaxContextData {
6771
dollar_crate_name: Symbol,
6872
}
6973

74+
impl SyntaxContextData {
75+
fn root() -> SyntaxContextData {
76+
SyntaxContextData {
77+
outer_expn: ExpnId::root(),
78+
outer_transparency: Transparency::Opaque,
79+
parent: SyntaxContext::root(),
80+
opaque: SyntaxContext::root(),
81+
opaque_and_semitransparent: SyntaxContext::root(),
82+
dollar_crate_name: kw::DollarCrate,
83+
}
84+
}
85+
86+
fn decode_placeholder() -> SyntaxContextData {
87+
SyntaxContextData { dollar_crate_name: kw::Empty, ..SyntaxContextData::root() }
88+
}
89+
90+
fn is_decode_placeholder(&self) -> bool {
91+
self.dollar_crate_name == kw::Empty
92+
}
93+
94+
fn key(&self) -> SyntaxContextKey {
95+
(self.parent, self.outer_expn, self.outer_transparency)
96+
}
97+
}
98+
7099
rustc_index::newtype_index! {
71100
/// A unique ID associated with a macro invocation and expansion.
72101
#[orderable]
@@ -333,7 +362,7 @@ pub(crate) struct HygieneData {
333362
foreign_expn_hashes: FxHashMap<ExpnId, ExpnHash>,
334363
expn_hash_to_expn_id: UnhashMap<ExpnHash, ExpnId>,
335364
syntax_context_data: Vec<SyntaxContextData>,
336-
syntax_context_map: FxHashMap<(SyntaxContext, ExpnId, Transparency), SyntaxContext>,
365+
syntax_context_map: FxHashMap<SyntaxContextKey, SyntaxContext>,
337366
/// Maps the `local_hash` of an `ExpnData` to the next disambiguator value.
338367
/// This is used by `update_disambiguator` to keep track of which `ExpnData`s
339368
/// would have collisions without a disambiguator.
@@ -352,22 +381,16 @@ impl HygieneData {
352381
None,
353382
);
354383

384+
let root_ctxt_data = SyntaxContextData::root();
355385
HygieneData {
356386
local_expn_data: IndexVec::from_elem_n(Some(root_data), 1),
357387
local_expn_hashes: IndexVec::from_elem_n(ExpnHash(Fingerprint::ZERO), 1),
358388
foreign_expn_data: FxHashMap::default(),
359389
foreign_expn_hashes: FxHashMap::default(),
360-
expn_hash_to_expn_id: std::iter::once((ExpnHash(Fingerprint::ZERO), ExpnId::root()))
390+
expn_hash_to_expn_id: iter::once((ExpnHash(Fingerprint::ZERO), ExpnId::root()))
361391
.collect(),
362-
syntax_context_data: vec![SyntaxContextData {
363-
outer_expn: ExpnId::root(),
364-
outer_transparency: Transparency::Opaque,
365-
parent: SyntaxContext(0),
366-
opaque: SyntaxContext(0),
367-
opaque_and_semitransparent: SyntaxContext(0),
368-
dollar_crate_name: kw::DollarCrate,
369-
}],
370-
syntax_context_map: FxHashMap::default(),
392+
syntax_context_data: vec![root_ctxt_data],
393+
syntax_context_map: iter::once((root_ctxt_data.key(), SyntaxContext(0))).collect(),
371394
expn_data_disambiguators: UnhashMap::default(),
372395
}
373396
}
@@ -416,23 +439,28 @@ impl HygieneData {
416439
}
417440

418441
fn normalize_to_macros_2_0(&self, ctxt: SyntaxContext) -> SyntaxContext {
442+
assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
419443
self.syntax_context_data[ctxt.0 as usize].opaque
420444
}
421445

422446
fn normalize_to_macro_rules(&self, ctxt: SyntaxContext) -> SyntaxContext {
447+
assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
423448
self.syntax_context_data[ctxt.0 as usize].opaque_and_semitransparent
424449
}
425450

426451
fn outer_expn(&self, ctxt: SyntaxContext) -> ExpnId {
452+
assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
427453
self.syntax_context_data[ctxt.0 as usize].outer_expn
428454
}
429455

430456
fn outer_mark(&self, ctxt: SyntaxContext) -> (ExpnId, Transparency) {
457+
assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
431458
let data = &self.syntax_context_data[ctxt.0 as usize];
432459
(data.outer_expn, data.outer_transparency)
433460
}
434461

435462
fn parent_ctxt(&self, ctxt: SyntaxContext) -> SyntaxContext {
463+
assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
436464
self.syntax_context_data[ctxt.0 as usize].parent
437465
}
438466

@@ -542,6 +570,7 @@ impl HygieneData {
542570
transparency: Transparency,
543571
) -> SyntaxContext {
544572
let syntax_context_data = &mut self.syntax_context_data;
573+
assert!(!syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
545574
let mut opaque = syntax_context_data[ctxt.0 as usize].opaque;
546575
let mut opaque_and_semitransparent =
547576
syntax_context_data[ctxt.0 as usize].opaque_and_semitransparent;
@@ -552,7 +581,7 @@ impl HygieneData {
552581
.syntax_context_map
553582
.entry((parent, expn_id, transparency))
554583
.or_insert_with(|| {
555-
let new_opaque = SyntaxContext(syntax_context_data.len() as u32);
584+
let new_opaque = SyntaxContext::from_usize(syntax_context_data.len());
556585
syntax_context_data.push(SyntaxContextData {
557586
outer_expn: expn_id,
558587
outer_transparency: transparency,
@@ -572,7 +601,7 @@ impl HygieneData {
572601
.entry((parent, expn_id, transparency))
573602
.or_insert_with(|| {
574603
let new_opaque_and_semitransparent =
575-
SyntaxContext(syntax_context_data.len() as u32);
604+
SyntaxContext::from_usize(syntax_context_data.len());
576605
syntax_context_data.push(SyntaxContextData {
577606
outer_expn: expn_id,
578607
outer_transparency: transparency,
@@ -587,8 +616,6 @@ impl HygieneData {
587616

588617
let parent = ctxt;
589618
*self.syntax_context_map.entry((parent, expn_id, transparency)).or_insert_with(|| {
590-
let new_opaque_and_semitransparent_and_transparent =
591-
SyntaxContext(syntax_context_data.len() as u32);
592619
syntax_context_data.push(SyntaxContextData {
593620
outer_expn: expn_id,
594621
outer_transparency: transparency,
@@ -597,7 +624,7 @@ impl HygieneData {
597624
opaque_and_semitransparent,
598625
dollar_crate_name: kw::DollarCrate,
599626
});
600-
new_opaque_and_semitransparent_and_transparent
627+
SyntaxContext::from_usize(syntax_context_data.len() - 1)
601628
})
602629
}
603630
}
@@ -704,6 +731,10 @@ impl SyntaxContext {
704731
SyntaxContext(raw as u32)
705732
}
706733

734+
fn from_usize(raw: usize) -> SyntaxContext {
735+
SyntaxContext(u32::try_from(raw).unwrap())
736+
}
737+
707738
/// Extend a syntax context with a given expansion and transparency.
708739
pub fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> SyntaxContext {
709740
HygieneData::with(|data| data.apply_mark(self, expn_id, transparency))
@@ -884,7 +915,10 @@ impl SyntaxContext {
884915
}
885916

886917
pub(crate) fn dollar_crate_name(self) -> Symbol {
887-
HygieneData::with(|data| data.syntax_context_data[self.0 as usize].dollar_crate_name)
918+
HygieneData::with(|data| {
919+
assert!(!data.syntax_context_data[self.0 as usize].is_decode_placeholder());
920+
data.syntax_context_data[self.0 as usize].dollar_crate_name
921+
})
888922
}
889923

890924
pub fn edition(self) -> Edition {
@@ -1224,7 +1258,7 @@ impl HygieneEncodeContext {
12241258

12251259
// Consume the current round of SyntaxContexts.
12261260
// Drop the lock() temporary early
1227-
let latest_ctxts = { std::mem::take(&mut *self.latest_ctxts.lock()) };
1261+
let latest_ctxts = { mem::take(&mut *self.latest_ctxts.lock()) };
12281262

12291263
// It's fine to iterate over a HashMap, because the serialization
12301264
// of the table that we insert data into doesn't depend on insertion
@@ -1236,7 +1270,7 @@ impl HygieneEncodeContext {
12361270
}
12371271
});
12381272

1239-
let latest_expns = { std::mem::take(&mut *self.latest_expns.lock()) };
1273+
let latest_expns = { mem::take(&mut *self.latest_expns.lock()) };
12401274

12411275
// Same as above, this is fine as we are inserting into a order-independent hashset
12421276
#[allow(rustc::potential_query_instability)]
@@ -1270,6 +1304,7 @@ pub struct HygieneDecodeContext {
12701304
inner: Lock<HygieneDecodeContextInner>,
12711305

12721306
/// A set of serialized `SyntaxContext` ids that are currently being decoded on each thread.
1307+
/// Uses a hash map instead of hash set to use entry APIs.
12731308
local_in_progress: WorkerLocal<RefCell<FxHashMap<u32, ()>>>,
12741309
}
12751310

@@ -1354,28 +1389,33 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
13541389
return SyntaxContext::root();
13551390
}
13561391

1357-
let ctxt = {
1392+
let pending_ctxt = {
13581393
let mut inner = context.inner.lock();
13591394

1395+
// Reminder: `HygieneDecodeContext` is per-crate, so there are no collisions between
1396+
// raw ids from different crate metadatas.
13601397
if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() {
13611398
// This has already been decoded.
13621399
return ctxt;
13631400
}
13641401

13651402
match inner.decoding.entry(raw_id) {
13661403
Entry::Occupied(ctxt_entry) => {
1404+
let pending_ctxt = *ctxt_entry.get();
13671405
match context.local_in_progress.borrow_mut().entry(raw_id) {
1368-
Entry::Occupied(..) => {
1369-
// We're decoding this already on the current thread. Return here
1370-
// and let the function higher up the stack finish decoding to handle
1371-
// recursive cases.
1372-
return *ctxt_entry.get();
1373-
}
1406+
// We're decoding this already on the current thread. Return here and let the
1407+
// function higher up the stack finish decoding to handle recursive cases.
1408+
// Hopefully having a `SyntaxContext` that refers to an incorrect data is ok
1409+
// during reminder of the decoding process, it's certainly not ok after the
1410+
// top level decoding function returns.
1411+
Entry::Occupied(..) => return pending_ctxt,
1412+
// Some other thread is current decoding this.
1413+
// Race with it (alternatively we could wait here).
1414+
// We cannot return this value, unlike in the recursive case above, because it
1415+
// may expose a `SyntaxContext` pointing to incorrect data to arbitrary code.
13741416
Entry::Vacant(entry) => {
13751417
entry.insert(());
1376-
1377-
// Some other thread is current decoding this. Race with it.
1378-
*ctxt_entry.get()
1418+
pending_ctxt
13791419
}
13801420
}
13811421
}
@@ -1386,18 +1426,10 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
13861426
// Allocate and store SyntaxContext id *before* calling the decoder function,
13871427
// as the SyntaxContextData may reference itself.
13881428
let new_ctxt = HygieneData::with(|hygiene_data| {
1389-
let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32);
13901429
// Push a dummy SyntaxContextData to ensure that nobody else can get the
1391-
// same ID as us. This will be overwritten after call `decode_Data`
1392-
hygiene_data.syntax_context_data.push(SyntaxContextData {
1393-
outer_expn: ExpnId::root(),
1394-
outer_transparency: Transparency::Transparent,
1395-
parent: SyntaxContext::root(),
1396-
opaque: SyntaxContext::root(),
1397-
opaque_and_semitransparent: SyntaxContext::root(),
1398-
dollar_crate_name: kw::Empty,
1399-
});
1400-
new_ctxt
1430+
// same ID as us. This will be overwritten after call `decode_data`.
1431+
hygiene_data.syntax_context_data.push(SyntaxContextData::decode_placeholder());
1432+
SyntaxContext::from_usize(hygiene_data.syntax_context_data.len() - 1)
14011433
});
14021434
entry.insert(new_ctxt);
14031435
new_ctxt
@@ -1407,38 +1439,43 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
14071439

14081440
// Don't try to decode data while holding the lock, since we need to
14091441
// be able to recursively decode a SyntaxContext
1410-
let mut ctxt_data = decode_data(d, raw_id);
1411-
// Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names`
1412-
// We don't care what the encoding crate set this to - we want to resolve it
1413-
// from the perspective of the current compilation session
1414-
ctxt_data.dollar_crate_name = kw::DollarCrate;
1415-
1416-
// Overwrite the dummy data with our decoded SyntaxContextData
1417-
HygieneData::with(|hygiene_data| {
1418-
if let Some(old) = hygiene_data.syntax_context_data.get(raw_id as usize)
1419-
&& old.outer_expn == ctxt_data.outer_expn
1420-
&& old.outer_transparency == ctxt_data.outer_transparency
1421-
&& old.parent == ctxt_data.parent
1422-
{
1423-
ctxt_data = old.clone();
1424-
}
1425-
1426-
let dummy = std::mem::replace(
1427-
&mut hygiene_data.syntax_context_data[ctxt.as_u32() as usize],
1428-
ctxt_data,
1429-
);
1430-
if cfg!(not(parallel_compiler)) {
1431-
// Make sure nothing weird happened while `decode_data` was running.
1432-
// We used `kw::Empty` for the dummy value and we expect nothing to be
1433-
// modifying the dummy entry.
1434-
// This does not hold for the parallel compiler as another thread may
1435-
// have inserted the fully decoded data.
1436-
assert_eq!(dummy.dollar_crate_name, kw::Empty);
1442+
let ctxt_data = decode_data(d, raw_id);
1443+
let ctxt_key = ctxt_data.key();
1444+
1445+
let ctxt = HygieneData::with(|hygiene_data| {
1446+
match hygiene_data.syntax_context_map.get(&ctxt_key) {
1447+
// Ensure that syntax contexts are unique.
1448+
// If syntax contexts with the given key already exists, reuse it instead of
1449+
// using `pending_ctxt`.
1450+
// `pending_ctxt` will leave an unused hole in the vector of syntax contexts.
1451+
// Hopefully its value isn't stored anywhere during decoding and its dummy data
1452+
// is never accessed later. The `is_decode_placeholder` asserts on all
1453+
// accesses to syntax context data attempt to ensure it.
1454+
Some(&ctxt) => ctxt,
1455+
// This is a completely new context.
1456+
// Overwrite its placeholder data with our decoded data.
1457+
None => {
1458+
let ctxt_data_ref =
1459+
&mut hygiene_data.syntax_context_data[pending_ctxt.as_u32() as usize];
1460+
let prev_ctxt_data = mem::replace(ctxt_data_ref, ctxt_data);
1461+
// Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names`.
1462+
// We don't care what the encoding crate set this to - we want to resolve it
1463+
// from the perspective of the current compilation session
1464+
ctxt_data_ref.dollar_crate_name = kw::DollarCrate;
1465+
// Make sure nothing weird happened while `decode_data` was running.
1466+
if !prev_ctxt_data.is_decode_placeholder() {
1467+
// With parallel compiler another thread may have already inserted the decoded
1468+
// data, but the decoded data should match.
1469+
assert!(cfg!(parallel_compiler));
1470+
assert_eq!(prev_ctxt_data, *ctxt_data_ref);
1471+
}
1472+
hygiene_data.syntax_context_map.insert(ctxt_key, pending_ctxt);
1473+
pending_ctxt
1474+
}
14371475
}
14381476
});
14391477

14401478
// Mark the context as completed
1441-
14421479
context.local_in_progress.borrow_mut().remove(&raw_id);
14431480

14441481
let mut inner = context.inner.lock();

0 commit comments

Comments
 (0)
Failed to load comments.