Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add *_value methods to proc_macro lib #136355

Merged
Merged
17 changes: 17 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
@@ -2151,6 +2151,13 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"

[[package]]
name = "literal-escaper"
version = "0.0.0"
dependencies = [
"rustc-std-workspace-std 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "lld-wrapper"
version = "0.1.0"
@@ -3328,6 +3335,12 @@ version = "1.0.1"
name = "rustc-std-workspace-std"
version = "1.0.1"

[[package]]
name = "rustc-std-workspace-std"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aba676a20abe46e5b0f1b0deae474aaaf31407e6c71147159890574599da04ef"

[[package]]
name = "rustc_abi"
version = "0.0.0"
@@ -3366,6 +3379,7 @@ name = "rustc_ast"
version = "0.0.0"
dependencies = [
"bitflags",
"literal-escaper",
"memchr",
"rustc_ast_ir",
"rustc_data_structures",
@@ -4060,6 +4074,7 @@ name = "rustc_lexer"
version = "0.0.0"
dependencies = [
"expect-test",
"literal-escaper",
"memchr",
"unicode-properties",
"unicode-xid",
@@ -4325,6 +4340,7 @@ name = "rustc_parse"
version = "0.0.0"
dependencies = [
"bitflags",
"literal-escaper",
"rustc_ast",
"rustc_ast_pretty",
"rustc_data_structures",
@@ -4347,6 +4363,7 @@ dependencies = [
name = "rustc_parse_format"
version = "0.0.0"
dependencies = [
"literal-escaper",
"rustc_index",
"rustc_lexer",
]
1 change: 1 addition & 0 deletions compiler/rustc_ast/Cargo.toml
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@ edition = "2021"
[dependencies]
# tidy-alphabetical-start
bitflags = "2.4.1"
literal-escaper = { path = "../../library/literal-escaper" }
memchr = "2.7.4"
rustc_ast_ir = { path = "../rustc_ast_ir" }
rustc_data_structures = { path = "../rustc_data_structures" }
2 changes: 1 addition & 1 deletion compiler/rustc_ast/src/util/literal.rs
Original file line number Diff line number Diff line change
@@ -2,7 +2,7 @@

use std::{ascii, fmt, str};

use rustc_lexer::unescape::{
use literal_escaper::{
MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
};
use rustc_span::{Span, Symbol, kw, sym};
1 change: 1 addition & 0 deletions compiler/rustc_lexer/Cargo.toml
Original file line number Diff line number Diff line change
@@ -16,6 +16,7 @@ Rust lexer used by rustc. No stability guarantees are provided.
[dependencies]
memchr = "2.7.4"
unicode-xid = "0.2.0"
literal-escaper = { path = "../../library/literal-escaper" }

[dependencies.unicode-properties]
version = "0.1.0"
4 changes: 3 additions & 1 deletion compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
@@ -27,11 +27,13 @@
// tidy-alphabetical-end

mod cursor;
pub mod unescape;

#[cfg(test)]
mod tests;

// FIXME: This is needed for rust-analyzer. Remove this dependency once rust-analyzer uses
// `literal-escaper`.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a sub-tree so you can just make the changes directly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now it's tricky because it requires to make literal_escaper a dependency of r-a. I'll check with r-a people directly after this is merged.

Copy link
Member

@Veykril Veykril Mar 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will break our auto publishing of rustc_lexer and I don't immediately see how to prevent that :/ That means we won't be able to sync our subtree after this change. Or well, I guess we just can't bump this crate until literal_escaper ships on stable. So maybe that's fine as long as no relevant changes happen to rustc_lexer.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No wait, this will break rustc_lexer and rustc_parse_format_args in general if I see this right? Those crates need to compile on stable which they no longer will by depending on this new unstable standard library crate.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, so for the time being, I leave it as is. Let's come up with a solution later on. :)

pub use literal_escaper as unescape;
use unicode_properties::UnicodeEmoji;
pub use unicode_xid::UNICODE_VERSION as UNICODE_XID_VERSION;

1 change: 1 addition & 0 deletions compiler/rustc_parse/Cargo.toml
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@ edition = "2021"
[dependencies]
# tidy-alphabetical-start
bitflags = "2.4.1"
literal-escaper = { path = "../../library/literal-escaper" }
rustc_ast = { path = "../rustc_ast" }
rustc_ast_pretty = { path = "../rustc_ast_pretty" }
rustc_data_structures = { path = "../rustc_data_structures" }
6 changes: 3 additions & 3 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use std::ops::Range;

use literal_escaper::{self, EscapeError, Mode};
use rustc_ast::ast::{self, AttrStyle};
use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
use rustc_ast::tokenstream::TokenStream;
use rustc_ast::util::unicode::contains_text_flow_control_chars;
use rustc_errors::codes::*;
use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
use rustc_lexer::unescape::{self, EscapeError, Mode};
use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError};
use rustc_session::lint::BuiltinLintDiag;
use rustc_session::lint::builtin::{
@@ -970,7 +970,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
postfix_len: u32,
) -> (token::LitKind, Symbol) {
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
unescape::unescape_unicode(src, mode, &mut |span, result| {
literal_escaper::unescape_unicode(src, mode, &mut |span, result| {
callback(span, result.map(drop))
})
})
@@ -986,7 +986,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
postfix_len: u32,
) -> (token::LitKind, Symbol) {
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
unescape::unescape_mixed(src, mode, &mut |span, result| {
literal_escaper::unescape_mixed(src, mode, &mut |span, result| {
callback(span, result.map(drop))
})
})
2 changes: 1 addition & 1 deletion compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
Original file line number Diff line number Diff line change
@@ -3,8 +3,8 @@
use std::iter::once;
use std::ops::Range;

use literal_escaper::{EscapeError, Mode};
use rustc_errors::{Applicability, DiagCtxtHandle, ErrorGuaranteed};
use rustc_lexer::unescape::{EscapeError, Mode};
use rustc_span::{BytePos, Span};
use tracing::debug;

2 changes: 1 addition & 1 deletion compiler/rustc_parse/src/parser/expr.rs
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@ use core::ops::{Bound, ControlFlow};
use ast::mut_visit::{self, MutVisitor};
use ast::token::IdentIsRaw;
use ast::{CoroutineKind, ForLoopKind, GenBlockKind, MatchKind, Pat, Path, PathSegment, Recovered};
use literal_escaper::unescape_char;
use rustc_ast::ptr::P;
use rustc_ast::token::{self, Delimiter, Token, TokenKind};
use rustc_ast::tokenstream::TokenTree;
@@ -21,7 +22,6 @@ use rustc_ast::{
use rustc_ast_pretty::pprust;
use rustc_data_structures::stack::ensure_sufficient_stack;
use rustc_errors::{Applicability, Diag, PResult, StashKey, Subdiagnostic};
use rustc_lexer::unescape::unescape_char;
use rustc_macros::Subdiagnostic;
use rustc_session::errors::{ExprParenthesesNeeded, report_lit_error};
use rustc_session::lint::BuiltinLintDiag;
1 change: 1 addition & 0 deletions compiler/rustc_parse_format/Cargo.toml
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@ edition = "2021"

[dependencies]
# tidy-alphabetical-start
literal-escaper = { path = "../../library/literal-escaper" }
rustc_index = { path = "../rustc_index", default-features = false }
rustc_lexer = { path = "../rustc_lexer" }
# tidy-alphabetical-end
11 changes: 6 additions & 5 deletions compiler/rustc_parse_format/src/lib.rs
Original file line number Diff line number Diff line change
@@ -19,7 +19,6 @@
pub use Alignment::*;
pub use Count::*;
pub use Position::*;
use rustc_lexer::unescape;

// Note: copied from rustc_span
/// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
@@ -1095,12 +1094,14 @@ fn find_width_map_from_snippet(
fn unescape_string(string: &str) -> Option<String> {
let mut buf = String::new();
let mut ok = true;
unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| {
match unescaped_char {
literal_escaper::unescape_unicode(
string,
literal_escaper::Mode::Str,
&mut |_, unescaped_char| match unescaped_char {
Ok(c) => buf.push(c),
Err(_) => ok = false,
}
});
},
);

ok.then_some(buf)
}
8 changes: 8 additions & 0 deletions library/Cargo.lock
Original file line number Diff line number Diff line change
@@ -158,6 +158,13 @@ dependencies = [
"rustc-std-workspace-core",
]

[[package]]
name = "literal-escaper"
version = "0.0.0"
dependencies = [
"rustc-std-workspace-std",
]

[[package]]
name = "memchr"
version = "2.7.4"
@@ -220,6 +227,7 @@ name = "proc_macro"
version = "0.0.0"
dependencies = [
"core",
"literal-escaper",
"std",
]

1 change: 1 addition & 0 deletions library/Cargo.toml
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ members = [
]

exclude = [
"literal-escaper",
# stdarch has its own Cargo workspace
"stdarch",
"windows_targets"
10 changes: 10 additions & 0 deletions library/literal-escaper/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[package]
name = "literal-escaper"
version = "0.0.0"
edition = "2021"

[dependencies]
std = { version = '1.0.0', optional = true, package = 'rustc-std-workspace-std' }

[features]
rustc-dep-of-std = ["dep:std"]
4 changes: 4 additions & 0 deletions library/literal-escaper/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# literal-escaper

This crate provides code to unescape string literals. It is used by `rustc_lexer`
and `proc_macro`.
File renamed without changes.
File renamed without changes.
1 change: 1 addition & 0 deletions library/proc_macro/Cargo.toml
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@ version = "0.0.0"
edition = "2021"

[dependencies]
literal-escaper = { path = "../literal-escaper", features = ["rustc-dep-of-std"] }
std = { path = "../std" }
# Workaround: when documenting this crate rustdoc will try to load crate named
# `core` when resolving doc links. Without this line a different `core` will be
115 changes: 115 additions & 0 deletions library/proc_macro/src/lib.rs
Original file line number Diff line number Diff line change
@@ -28,6 +28,7 @@
#![feature(restricted_std)]
#![feature(rustc_attrs)]
#![feature(extend_one)]
#![feature(stmt_expr_attributes)]
#![recursion_limit = "256"]
#![allow(internal_features)]
#![deny(ffi_unwind_calls)]
@@ -50,11 +51,24 @@ use std::{error, fmt};

#[unstable(feature = "proc_macro_diagnostic", issue = "54140")]
pub use diagnostic::{Diagnostic, Level, MultiSpan};
#[unstable(feature = "proc_macro_value", issue = "136652")]
pub use literal_escaper::EscapeError;
use literal_escaper::{MixedUnit, Mode, byte_from_char, unescape_mixed, unescape_unicode};
#[unstable(feature = "proc_macro_totokens", issue = "130977")]
pub use to_tokens::ToTokens;

use crate::escape::{EscapeOptions, escape_bytes};

/// Errors returned when trying to retrieve a literal unescaped value.
#[unstable(feature = "proc_macro_value", issue = "136652")]
#[derive(Debug, PartialEq, Eq)]
pub enum ConversionErrorKind {
/// The literal failed to be escaped, take a look at [`EscapeError`] for more information.
FailedToUnescape(EscapeError),
/// Trying to convert a literal with the wrong type.
InvalidLiteralKind,
}

/// Determines whether proc_macro has been made accessible to the currently
/// running program.
///
@@ -1450,6 +1464,107 @@ impl Literal {
}
})
}

/// Returns the unescaped string value if the current literal is a string or a string literal.
#[unstable(feature = "proc_macro_value", issue = "136652")]
pub fn str_value(&self) -> Result<String, ConversionErrorKind> {
self.0.symbol.with(|symbol| match self.0.kind {
bridge::LitKind::Str => {
if symbol.contains('\\') {
let mut buf = String::with_capacity(symbol.len());
let mut error = None;
// Force-inlining here is aggressive but the closure is
// called on every char in the string, so it can be hot in
// programs with many long strings containing escapes.
unescape_unicode(
symbol,
Mode::Str,
&mut #[inline(always)]
|_, c| match c {
Ok(c) => buf.push(c),
Err(err) => {
if err.is_fatal() {
error = Some(ConversionErrorKind::FailedToUnescape(err));
}
}
},
);
if let Some(error) = error { Err(error) } else { Ok(buf) }
} else {
Ok(symbol.to_string())
}
}
bridge::LitKind::StrRaw(_) => Ok(symbol.to_string()),
_ => Err(ConversionErrorKind::InvalidLiteralKind),
})
}

/// Returns the unescaped string value if the current literal is a c-string or a c-string
/// literal.
#[unstable(feature = "proc_macro_value", issue = "136652")]
pub fn cstr_value(&self) -> Result<Vec<u8>, ConversionErrorKind> {
self.0.symbol.with(|symbol| match self.0.kind {
bridge::LitKind::CStr => {
let mut error = None;
let mut buf = Vec::with_capacity(symbol.len());

unescape_mixed(symbol, Mode::CStr, &mut |_span, c| match c {
Ok(MixedUnit::Char(c)) => {
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
}
Ok(MixedUnit::HighByte(b)) => buf.push(b),
Err(err) => {
if err.is_fatal() {
error = Some(ConversionErrorKind::FailedToUnescape(err));
}
}
});
if let Some(error) = error {
Err(error)
} else {
buf.push(0);
Ok(buf)
}
}
bridge::LitKind::CStrRaw(_) => {
// Raw strings have no escapes so we can convert the symbol
// directly to a `Lrc<u8>` after appending the terminating NUL
// char.
let mut buf = symbol.to_owned().into_bytes();
buf.push(0);
Ok(buf)
}
_ => Err(ConversionErrorKind::InvalidLiteralKind),
})
}

/// Returns the unescaped string value if the current literal is a byte string or a byte string
/// literal.
#[unstable(feature = "proc_macro_value", issue = "136652")]
pub fn byte_str_value(&self) -> Result<Vec<u8>, ConversionErrorKind> {
self.0.symbol.with(|symbol| match self.0.kind {
bridge::LitKind::ByteStr => {
let mut buf = Vec::with_capacity(symbol.len());
let mut error = None;

unescape_unicode(symbol, Mode::ByteStr, &mut |_, c| match c {
Ok(c) => buf.push(byte_from_char(c)),
Err(err) => {
if err.is_fatal() {
error = Some(ConversionErrorKind::FailedToUnescape(err));
}
}
});
if let Some(error) = error { Err(error) } else { Ok(buf) }
}
bridge::LitKind::ByteStrRaw(_) => {
// Raw strings have no escapes so we can convert the symbol
// directly to a `Lrc<u8>`.
Ok(symbol.to_owned().into_bytes())
}
_ => Err(ConversionErrorKind::InvalidLiteralKind),
})
}
}

/// Parse a single literal from its stringified representation.
Loading
Loading