Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 08dfbf4

Browse files
authoredMar 17, 2025
Rollup merge of rust-lang#136355 - GuillaumeGomez:proc-macro_add_value_retrieval_methods, r=Amanieu
Add `*_value` methods to proc_macro lib This is the implementation of rust-lang/libs-team#459. It allows to get the actual value (unescaped) of the different string literals. Part of rust-lang#136652. r? libs-api
2 parents 10bcdad + 4394f94 commit 08dfbf4

File tree

27 files changed

+253
-15
lines changed

27 files changed

+253
-15
lines changed
 

‎Cargo.lock

+17
Original file line numberDiff line numberDiff line change
@@ -2082,6 +2082,13 @@ version = "0.7.4"
20822082
source = "registry+https://github.com/rust-lang/crates.io-index"
20832083
checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
20842084

2085+
[[package]]
2086+
name = "literal-escaper"
2087+
version = "0.0.0"
2088+
dependencies = [
2089+
"rustc-std-workspace-std 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
2090+
]
2091+
20852092
[[package]]
20862093
name = "lld-wrapper"
20872094
version = "0.1.0"
@@ -3148,6 +3155,12 @@ version = "1.0.1"
31483155
name = "rustc-std-workspace-std"
31493156
version = "1.0.1"
31503157

3158+
[[package]]
3159+
name = "rustc-std-workspace-std"
3160+
version = "1.0.1"
3161+
source = "registry+https://github.com/rust-lang/crates.io-index"
3162+
checksum = "aba676a20abe46e5b0f1b0deae474aaaf31407e6c71147159890574599da04ef"
3163+
31513164
[[package]]
31523165
name = "rustc_abi"
31533166
version = "0.0.0"
@@ -3186,6 +3199,7 @@ name = "rustc_ast"
31863199
version = "0.0.0"
31873200
dependencies = [
31883201
"bitflags",
3202+
"literal-escaper",
31893203
"memchr",
31903204
"rustc_ast_ir",
31913205
"rustc_data_structures",
@@ -3895,6 +3909,7 @@ name = "rustc_lexer"
38953909
version = "0.0.0"
38963910
dependencies = [
38973911
"expect-test",
3912+
"literal-escaper",
38983913
"memchr",
38993914
"unicode-properties",
39003915
"unicode-xid",
@@ -4157,6 +4172,7 @@ name = "rustc_parse"
41574172
version = "0.0.0"
41584173
dependencies = [
41594174
"bitflags",
4175+
"literal-escaper",
41604176
"rustc_ast",
41614177
"rustc_ast_pretty",
41624178
"rustc_data_structures",
@@ -4179,6 +4195,7 @@ dependencies = [
41794195
name = "rustc_parse_format"
41804196
version = "0.0.0"
41814197
dependencies = [
4198+
"literal-escaper",
41824199
"rustc_index",
41834200
"rustc_lexer",
41844201
]

‎compiler/rustc_ast/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ edition = "2024"
66
[dependencies]
77
# tidy-alphabetical-start
88
bitflags = "2.4.1"
9+
literal-escaper = { path = "../../library/literal-escaper" }
910
memchr = "2.7.4"
1011
rustc_ast_ir = { path = "../rustc_ast_ir" }
1112
rustc_data_structures = { path = "../rustc_data_structures" }

‎compiler/rustc_ast/src/util/literal.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
use std::{ascii, fmt, str};
44

5-
use rustc_lexer::unescape::{
5+
use literal_escaper::{
66
MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
77
};
88
use rustc_span::{Span, Symbol, kw, sym};

‎compiler/rustc_lexer/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Rust lexer used by rustc. No stability guarantees are provided.
1616
[dependencies]
1717
memchr = "2.7.4"
1818
unicode-xid = "0.2.0"
19+
literal-escaper = { path = "../../library/literal-escaper" }
1920

2021
[dependencies.unicode-properties]
2122
version = "0.1.0"

‎compiler/rustc_lexer/src/lib.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,13 @@
2626
// tidy-alphabetical-end
2727

2828
mod cursor;
29-
pub mod unescape;
3029

3130
#[cfg(test)]
3231
mod tests;
3332

33+
// FIXME: This is needed for rust-analyzer. Remove this dependency once rust-analyzer uses
34+
// `literal-escaper`.
35+
pub use literal_escaper as unescape;
3436
use unicode_properties::UnicodeEmoji;
3537
pub use unicode_xid::UNICODE_VERSION as UNICODE_XID_VERSION;
3638

‎compiler/rustc_parse/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ edition = "2024"
66
[dependencies]
77
# tidy-alphabetical-start
88
bitflags = "2.4.1"
9+
literal-escaper = { path = "../../library/literal-escaper" }
910
rustc_ast = { path = "../rustc_ast" }
1011
rustc_ast_pretty = { path = "../rustc_ast_pretty" }
1112
rustc_data_structures = { path = "../rustc_data_structures" }

‎compiler/rustc_parse/src/lexer/mod.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
use std::ops::Range;
22

3+
use literal_escaper::{self, EscapeError, Mode};
34
use rustc_ast::ast::{self, AttrStyle};
45
use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
56
use rustc_ast::tokenstream::TokenStream;
67
use rustc_ast::util::unicode::contains_text_flow_control_chars;
78
use rustc_errors::codes::*;
89
use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
9-
use rustc_lexer::unescape::{self, EscapeError, Mode};
1010
use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError};
1111
use rustc_session::lint::BuiltinLintDiag;
1212
use rustc_session::lint::builtin::{
@@ -970,7 +970,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
970970
postfix_len: u32,
971971
) -> (token::LitKind, Symbol) {
972972
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
973-
unescape::unescape_unicode(src, mode, &mut |span, result| {
973+
literal_escaper::unescape_unicode(src, mode, &mut |span, result| {
974974
callback(span, result.map(drop))
975975
})
976976
})
@@ -986,7 +986,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
986986
postfix_len: u32,
987987
) -> (token::LitKind, Symbol) {
988988
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
989-
unescape::unescape_mixed(src, mode, &mut |span, result| {
989+
literal_escaper::unescape_mixed(src, mode, &mut |span, result| {
990990
callback(span, result.map(drop))
991991
})
992992
})

‎compiler/rustc_parse/src/lexer/unescape_error_reporting.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
use std::iter::once;
44
use std::ops::Range;
55

6+
use literal_escaper::{EscapeError, Mode};
67
use rustc_errors::{Applicability, DiagCtxtHandle, ErrorGuaranteed};
7-
use rustc_lexer::unescape::{EscapeError, Mode};
88
use rustc_span::{BytePos, Span};
99
use tracing::debug;
1010

‎compiler/rustc_parse/src/parser/expr.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use core::ops::{Bound, ControlFlow};
66
use ast::mut_visit::{self, MutVisitor};
77
use ast::token::{IdentIsRaw, MetaVarKind};
88
use ast::{CoroutineKind, ForLoopKind, GenBlockKind, MatchKind, Pat, Path, PathSegment, Recovered};
9+
use literal_escaper::unescape_char;
910
use rustc_ast::ptr::P;
1011
use rustc_ast::token::{self, Delimiter, Token, TokenKind};
1112
use rustc_ast::tokenstream::TokenTree;
@@ -21,7 +22,6 @@ use rustc_ast::{
2122
use rustc_ast_pretty::pprust;
2223
use rustc_data_structures::stack::ensure_sufficient_stack;
2324
use rustc_errors::{Applicability, Diag, PResult, StashKey, Subdiagnostic};
24-
use rustc_lexer::unescape::unescape_char;
2525
use rustc_macros::Subdiagnostic;
2626
use rustc_session::errors::{ExprParenthesesNeeded, report_lit_error};
2727
use rustc_session::lint::BuiltinLintDiag;

‎compiler/rustc_parse_format/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ edition = "2024"
55

66
[dependencies]
77
# tidy-alphabetical-start
8+
literal-escaper = { path = "../../library/literal-escaper" }
89
rustc_index = { path = "../rustc_index", default-features = false }
910
rustc_lexer = { path = "../rustc_lexer" }
1011
# tidy-alphabetical-end

‎compiler/rustc_parse_format/src/lib.rs

+6-5
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
pub use Alignment::*;
1919
pub use Count::*;
2020
pub use Position::*;
21-
use rustc_lexer::unescape;
2221

2322
// Note: copied from rustc_span
2423
/// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
@@ -1094,12 +1093,14 @@ fn find_width_map_from_snippet(
10941093
fn unescape_string(string: &str) -> Option<String> {
10951094
let mut buf = String::new();
10961095
let mut ok = true;
1097-
unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| {
1098-
match unescaped_char {
1096+
literal_escaper::unescape_unicode(
1097+
string,
1098+
literal_escaper::Mode::Str,
1099+
&mut |_, unescaped_char| match unescaped_char {
10991100
Ok(c) => buf.push(c),
11001101
Err(_) => ok = false,
1101-
}
1102-
});
1102+
},
1103+
);
11031104

11041105
ok.then_some(buf)
11051106
}

‎library/Cargo.lock

+8
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,13 @@ dependencies = [
165165
"rustc-std-workspace-core",
166166
]
167167

168+
[[package]]
169+
name = "literal-escaper"
170+
version = "0.0.0"
171+
dependencies = [
172+
"rustc-std-workspace-std",
173+
]
174+
168175
[[package]]
169176
name = "memchr"
170177
version = "2.7.4"
@@ -236,6 +243,7 @@ name = "proc_macro"
236243
version = "0.0.0"
237244
dependencies = [
238245
"core",
246+
"literal-escaper",
239247
"std",
240248
]
241249

‎library/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ members = [
88
]
99

1010
exclude = [
11+
"literal-escaper",
1112
# stdarch has its own Cargo workspace
1213
"stdarch",
1314
"windows_targets"

‎library/literal-escaper/Cargo.toml

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[package]
2+
name = "literal-escaper"
3+
version = "0.0.0"
4+
edition = "2021"
5+
6+
[dependencies]
7+
std = { version = '1.0.0', optional = true, package = 'rustc-std-workspace-std' }
8+
9+
[features]
10+
rustc-dep-of-std = ["dep:std"]

‎library/literal-escaper/README.md

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# literal-escaper
2+
3+
This crate provides code to unescape string literals. It is used by `rustc_lexer`
4+
and `proc_macro`.
File renamed without changes.

‎library/proc_macro/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ version = "0.0.0"
44
edition = "2024"
55

66
[dependencies]
7+
literal-escaper = { path = "../literal-escaper", features = ["rustc-dep-of-std"] }
78
std = { path = "../std" }
89
# Workaround: when documenting this crate rustdoc will try to load crate named
910
# `core` when resolving doc links. Without this line a different `core` will be

‎library/proc_macro/src/lib.rs

+115
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#![feature(restricted_std)]
2929
#![feature(rustc_attrs)]
3030
#![feature(extend_one)]
31+
#![feature(stmt_expr_attributes)]
3132
#![recursion_limit = "256"]
3233
#![allow(internal_features)]
3334
#![deny(ffi_unwind_calls)]
@@ -51,11 +52,24 @@ use std::{error, fmt};
5152

5253
#[unstable(feature = "proc_macro_diagnostic", issue = "54140")]
5354
pub use diagnostic::{Diagnostic, Level, MultiSpan};
55+
#[unstable(feature = "proc_macro_value", issue = "136652")]
56+
pub use literal_escaper::EscapeError;
57+
use literal_escaper::{MixedUnit, Mode, byte_from_char, unescape_mixed, unescape_unicode};
5458
#[unstable(feature = "proc_macro_totokens", issue = "130977")]
5559
pub use to_tokens::ToTokens;
5660

5761
use crate::escape::{EscapeOptions, escape_bytes};
5862

63+
/// Errors returned when trying to retrieve a literal unescaped value.
64+
#[unstable(feature = "proc_macro_value", issue = "136652")]
65+
#[derive(Debug, PartialEq, Eq)]
66+
pub enum ConversionErrorKind {
67+
/// The literal failed to be escaped, take a look at [`EscapeError`] for more information.
68+
FailedToUnescape(EscapeError),
69+
/// Trying to convert a literal with the wrong type.
70+
InvalidLiteralKind,
71+
}
72+
5973
/// Determines whether proc_macro has been made accessible to the currently
6074
/// running program.
6175
///
@@ -1451,6 +1465,107 @@ impl Literal {
14511465
}
14521466
})
14531467
}
1468+
1469+
/// Returns the unescaped string value if the current literal is a string or a string literal.
1470+
#[unstable(feature = "proc_macro_value", issue = "136652")]
1471+
pub fn str_value(&self) -> Result<String, ConversionErrorKind> {
1472+
self.0.symbol.with(|symbol| match self.0.kind {
1473+
bridge::LitKind::Str => {
1474+
if symbol.contains('\\') {
1475+
let mut buf = String::with_capacity(symbol.len());
1476+
let mut error = None;
1477+
// Force-inlining here is aggressive but the closure is
1478+
// called on every char in the string, so it can be hot in
1479+
// programs with many long strings containing escapes.
1480+
unescape_unicode(
1481+
symbol,
1482+
Mode::Str,
1483+
&mut #[inline(always)]
1484+
|_, c| match c {
1485+
Ok(c) => buf.push(c),
1486+
Err(err) => {
1487+
if err.is_fatal() {
1488+
error = Some(ConversionErrorKind::FailedToUnescape(err));
1489+
}
1490+
}
1491+
},
1492+
);
1493+
if let Some(error) = error { Err(error) } else { Ok(buf) }
1494+
} else {
1495+
Ok(symbol.to_string())
1496+
}
1497+
}
1498+
bridge::LitKind::StrRaw(_) => Ok(symbol.to_string()),
1499+
_ => Err(ConversionErrorKind::InvalidLiteralKind),
1500+
})
1501+
}
1502+
1503+
/// Returns the unescaped string value if the current literal is a c-string or a c-string
1504+
/// literal.
1505+
#[unstable(feature = "proc_macro_value", issue = "136652")]
1506+
pub fn cstr_value(&self) -> Result<Vec<u8>, ConversionErrorKind> {
1507+
self.0.symbol.with(|symbol| match self.0.kind {
1508+
bridge::LitKind::CStr => {
1509+
let mut error = None;
1510+
let mut buf = Vec::with_capacity(symbol.len());
1511+
1512+
unescape_mixed(symbol, Mode::CStr, &mut |_span, c| match c {
1513+
Ok(MixedUnit::Char(c)) => {
1514+
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
1515+
}
1516+
Ok(MixedUnit::HighByte(b)) => buf.push(b),
1517+
Err(err) => {
1518+
if err.is_fatal() {
1519+
error = Some(ConversionErrorKind::FailedToUnescape(err));
1520+
}
1521+
}
1522+
});
1523+
if let Some(error) = error {
1524+
Err(error)
1525+
} else {
1526+
buf.push(0);
1527+
Ok(buf)
1528+
}
1529+
}
1530+
bridge::LitKind::CStrRaw(_) => {
1531+
// Raw strings have no escapes so we can convert the symbol
1532+
// directly to a `Lrc<u8>` after appending the terminating NUL
1533+
// char.
1534+
let mut buf = symbol.to_owned().into_bytes();
1535+
buf.push(0);
1536+
Ok(buf)
1537+
}
1538+
_ => Err(ConversionErrorKind::InvalidLiteralKind),
1539+
})
1540+
}
1541+
1542+
/// Returns the unescaped string value if the current literal is a byte string or a byte string
1543+
/// literal.
1544+
#[unstable(feature = "proc_macro_value", issue = "136652")]
1545+
pub fn byte_str_value(&self) -> Result<Vec<u8>, ConversionErrorKind> {
1546+
self.0.symbol.with(|symbol| match self.0.kind {
1547+
bridge::LitKind::ByteStr => {
1548+
let mut buf = Vec::with_capacity(symbol.len());
1549+
let mut error = None;
1550+
1551+
unescape_unicode(symbol, Mode::ByteStr, &mut |_, c| match c {
1552+
Ok(c) => buf.push(byte_from_char(c)),
1553+
Err(err) => {
1554+
if err.is_fatal() {
1555+
error = Some(ConversionErrorKind::FailedToUnescape(err));
1556+
}
1557+
}
1558+
});
1559+
if let Some(error) = error { Err(error) } else { Ok(buf) }
1560+
}
1561+
bridge::LitKind::ByteStrRaw(_) => {
1562+
// Raw strings have no escapes so we can convert the symbol
1563+
// directly to a `Lrc<u8>`.
1564+
Ok(symbol.to_owned().into_bytes())
1565+
}
1566+
_ => Err(ConversionErrorKind::InvalidLiteralKind),
1567+
})
1568+
}
14541569
}
14551570

14561571
/// Parse a single literal from its stringified representation.
There was a problem loading the remainder of the diff.

0 commit comments

Comments
 (0)
Failed to load comments.