Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit a365890

Browse files
committedMay 24, 2024
Auto merge of rust-lang#121150 - Swatinem:debug-ascii-str, r=joboet
Add a fast-path to `Debug` ASCII `&str` Instead of going through the `EscapeDebug` machinery, we can just skip over ASCII chars that don’t need any escaping. --- This is an alternative / a companion to rust-lang#121138. The other PR is adding the fast path deep within `EscapeDebug`, whereas this skips as early as possible.
2 parents 99a4928 + 05754b8 commit a365890

File tree

2 files changed

+43
-20
lines changed

2 files changed

+43
-20
lines changed
 

‎core/benches/str/debug.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ fn ascii_escapes(b: &mut Bencher) {
4444
assert_fmt(
4545
s,
4646
r#""some\tmore\tascii\ttext\nthis time with some \"escapes\", also 64 byte""#,
47-
21,
47+
15,
4848
);
4949
b.iter(|| {
5050
black_box(format!("{:?}", black_box(s)));
@@ -72,7 +72,7 @@ fn mostly_unicode(b: &mut Bencher) {
7272
#[bench]
7373
fn mixed(b: &mut Bencher) {
7474
let s = "\"❤️\"\n\"hűha ez betű\"\n\"кириллических букв\".";
75-
assert_fmt(s, r#""\"❤\u{fe0f}\"\n\"hűha ez betű\"\n\"кириллических букв\".""#, 36);
75+
assert_fmt(s, r#""\"❤\u{fe0f}\"\n\"hűha ez betű\"\n\"кириллических букв\".""#, 21);
7676
b.iter(|| {
7777
black_box(format!("{:?}", black_box(s)));
7878
});

‎core/src/fmt/mod.rs

+41-18
Original file line numberDiff line numberDiff line change
@@ -2402,23 +2402,47 @@ impl Display for bool {
24022402
impl Debug for str {
24032403
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
24042404
f.write_char('"')?;
2405-
let mut from = 0;
2406-
for (i, c) in self.char_indices() {
2407-
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
2408-
escape_grapheme_extended: true,
2409-
escape_single_quote: false,
2410-
escape_double_quote: true,
2411-
});
2412-
// If char needs escaping, flush backlog so far and write, else skip
2413-
if esc.len() != 1 {
2414-
f.write_str(&self[from..i])?;
2415-
for c in esc {
2416-
f.write_char(c)?;
2405+
2406+
// substring we know is printable
2407+
let mut printable_range = 0..0;
2408+
2409+
fn needs_escape(b: u8) -> bool {
2410+
b > 0x7E || b < 0x20 || b == b'\\' || b == b'"'
2411+
}
2412+
2413+
// the loop here first skips over runs of printable ASCII as a fast path.
2414+
// other chars (unicode, or ASCII that needs escaping) are then handled per-`char`.
2415+
let mut rest = self;
2416+
while rest.len() > 0 {
2417+
let Some(non_printable_start) = rest.as_bytes().iter().position(|&b| needs_escape(b))
2418+
else {
2419+
printable_range.end += rest.len();
2420+
break;
2421+
};
2422+
2423+
printable_range.end += non_printable_start;
2424+
// SAFETY: the position was derived from an iterator, so is known to be within bounds, and at a char boundary
2425+
rest = unsafe { rest.get_unchecked(non_printable_start..) };
2426+
2427+
let mut chars = rest.chars();
2428+
if let Some(c) = chars.next() {
2429+
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
2430+
escape_grapheme_extended: true,
2431+
escape_single_quote: false,
2432+
escape_double_quote: true,
2433+
});
2434+
if esc.len() != 1 {
2435+
f.write_str(&self[printable_range.clone()])?;
2436+
Display::fmt(&esc, f)?;
2437+
printable_range.start = printable_range.end + c.len_utf8();
24172438
}
2418-
from = i + c.len_utf8();
2439+
printable_range.end += c.len_utf8();
24192440
}
2441+
rest = chars.as_str();
24202442
}
2421-
f.write_str(&self[from..])?;
2443+
2444+
f.write_str(&self[printable_range])?;
2445+
24222446
f.write_char('"')
24232447
}
24242448
}
@@ -2434,13 +2458,12 @@ impl Display for str {
24342458
impl Debug for char {
24352459
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
24362460
f.write_char('\'')?;
2437-
for c in self.escape_debug_ext(EscapeDebugExtArgs {
2461+
let esc = self.escape_debug_ext(EscapeDebugExtArgs {
24382462
escape_grapheme_extended: true,
24392463
escape_single_quote: true,
24402464
escape_double_quote: false,
2441-
}) {
2442-
f.write_char(c)?
2443-
}
2465+
});
2466+
Display::fmt(&esc, f)?;
24442467
f.write_char('\'')
24452468
}
24462469
}

0 commit comments

Comments
 (0)
Failed to load comments.