Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 27136c4

Browse files
committedSep 23, 2024
Add fast path for ascii to ascii in str::replace
1 parent ba6158c commit 27136c4

File tree

3 files changed

+63
-2
lines changed

3 files changed

+63
-2
lines changed
 

‎alloc/src/str.rs

+24-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ pub use core::str::SplitInclusive;
1919
pub use core::str::SplitWhitespace;
2020
#[stable(feature = "rust1", since = "1.0.0")]
2121
pub use core::str::pattern;
22-
use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher};
22+
use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher, Utf8Pattern};
2323
#[stable(feature = "rust1", since = "1.0.0")]
2424
pub use core::str::{Bytes, CharIndices, Chars, from_utf8, from_utf8_mut};
2525
#[stable(feature = "str_escape", since = "1.34.0")]
@@ -268,6 +268,18 @@ impl str {
268268
#[stable(feature = "rust1", since = "1.0.0")]
269269
#[inline]
270270
pub fn replace<P: Pattern>(&self, from: P, to: &str) -> String {
271+
// Fast path for ASCII to ASCII case.
272+
273+
if let Some(from_byte) = match from.as_utf8_pattern() {
274+
Some(Utf8Pattern::StringPattern([from_byte])) => Some(*from_byte),
275+
Some(Utf8Pattern::CharPattern(c)) => c.as_ascii().map(|ascii_char| ascii_char.to_u8()),
276+
_ => None,
277+
} {
278+
if let [to_byte] = to.as_bytes() {
279+
return unsafe { replace_ascii(self.as_bytes(), from_byte, *to_byte) };
280+
}
281+
}
282+
271283
let mut result = String::new();
272284
let mut last_end = 0;
273285
for (start, part) in self.match_indices(from) {
@@ -661,3 +673,14 @@ fn convert_while_ascii(b: &[u8], convert: fn(&u8) -> u8) -> Vec<u8> {
661673

662674
out
663675
}
676+
#[inline]
677+
#[cfg(not(test))]
678+
#[cfg(not(no_global_oom_handling))]
679+
#[allow(dead_code)]
680+
/// Faster implementation of string replacement for ASCII to ASCII cases.
681+
/// Should produce fast vectorized code.
682+
unsafe fn replace_ascii(utf8_bytes: &[u8], from: u8, to: u8) -> String {
683+
let result: Vec<u8> = utf8_bytes.iter().map(|b| if *b == from { to } else { *b }).collect();
684+
// SAFETY: We replaced ascii with ascii on valid utf8 strings.
685+
unsafe { String::from_utf8_unchecked(result) }
686+
}

‎alloc/src/string.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ use core::ops::AddAssign;
5353
#[cfg(not(no_global_oom_handling))]
5454
use core::ops::Bound::{Excluded, Included, Unbounded};
5555
use core::ops::{self, Range, RangeBounds};
56-
use core::str::pattern::Pattern;
56+
use core::str::pattern::{Pattern, Utf8Pattern};
5757
use core::{fmt, hash, ptr, slice};
5858

5959
#[cfg(not(no_global_oom_handling))]
@@ -2424,6 +2424,11 @@ impl<'b> Pattern for &'b String {
24242424
{
24252425
self[..].strip_suffix_of(haystack)
24262426
}
2427+
2428+
#[inline]
2429+
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
2430+
Some(Utf8Pattern::StringPattern(self.as_bytes()))
2431+
}
24272432
}
24282433

24292434
macro_rules! impl_eq {

‎core/src/str/pattern.rs

+33
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,19 @@ pub trait Pattern: Sized {
160160
None
161161
}
162162
}
163+
164+
/// Returns the pattern as utf-8 bytes if possible.
165+
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>>;
166+
}
167+
/// Result of calling [`Pattern::as_utf8_pattern()`].
168+
/// Can be used for inspecting the contents of a [`Pattern`] in cases
169+
/// where the underlying representation can be represented as UTF-8.
170+
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
171+
pub enum Utf8Pattern<'a> {
172+
/// Type returned by String and str types.
173+
StringPattern(&'a [u8]),
174+
/// Type returned by char types.
175+
CharPattern(char),
163176
}
164177

165178
// Searcher
@@ -599,6 +612,11 @@ impl Pattern for char {
599612
{
600613
self.encode_utf8(&mut [0u8; 4]).strip_suffix_of(haystack)
601614
}
615+
616+
#[inline]
617+
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
618+
Some(Utf8Pattern::CharPattern(*self))
619+
}
602620
}
603621

604622
/////////////////////////////////////////////////////////////////////////////
@@ -657,6 +675,11 @@ impl<C: MultiCharEq> Pattern for MultiCharEqPattern<C> {
657675
fn into_searcher(self, haystack: &str) -> MultiCharEqSearcher<'_, C> {
658676
MultiCharEqSearcher { haystack, char_eq: self.0, char_indices: haystack.char_indices() }
659677
}
678+
679+
#[inline]
680+
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
681+
None
682+
}
660683
}
661684

662685
unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> {
@@ -747,6 +770,11 @@ macro_rules! pattern_methods {
747770
{
748771
($pmap)(self).strip_suffix_of(haystack)
749772
}
773+
774+
#[inline]
775+
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
776+
None
777+
}
750778
};
751779
}
752780

@@ -1022,6 +1050,11 @@ impl<'b> Pattern for &'b str {
10221050
None
10231051
}
10241052
}
1053+
1054+
#[inline]
1055+
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
1056+
Some(Utf8Pattern::StringPattern(self.as_bytes()))
1057+
}
10251058
}
10261059

10271060
/////////////////////////////////////////////////////////////////////////////

0 commit comments

Comments
 (0)
Failed to load comments.