add MAX_LEN_UTF8 and MAX_LEN_UTF16 constants

HTGAzureX1212 · HTGAzureX1212 · commit 7786d80d0256 · 2025-02-16T19:14:04.000+08:00
diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs
@@ -105,6 +105,7 @@
 #![feature(box_uninit_write)]
 #![feature(bstr)]
 #![feature(bstr_internals)]
+#![feature(char_max_len)]
 #![feature(clone_to_uninit)]
 #![feature(coerce_unsized)]
 #![feature(const_eval_select)]
diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs
@@ -1419,7 +1419,9 @@ impl String {
     pub fn push(&mut self, ch: char) {
         match ch.len_utf8() {
             1 => self.vec.push(ch as u8),
-            _ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),
+            _ => {
+                self.vec.extend_from_slice(ch.encode_utf8(&mut [0; char::MAX_LEN_UTF8]).as_bytes())
+            }
         }
     }
 
@@ -1716,7 +1718,7 @@ impl String {
     #[rustc_confusables("set")]
     pub fn insert(&mut self, idx: usize, ch: char) {
         assert!(self.is_char_boundary(idx));
-        let mut bits = [0; 4];
+        let mut bits = [0; char::MAX_LEN_UTF8];
         let bits = ch.encode_utf8(&mut bits).as_bytes();
 
         unsafe {
@@ -2771,7 +2773,7 @@ impl SpecToString for core::ascii::Char {
 impl SpecToString for char {
     #[inline]
     fn spec_to_string(&self) -> String {
-        String::from(self.encode_utf8(&mut [0; 4]))
+        String::from(self.encode_utf8(&mut [0; char::MAX_LEN_UTF8]))
     }
 }
 
diff --git a/library/alloc/tests/lib.rs b/library/alloc/tests/lib.rs
@@ -3,6 +3,7 @@
 #![feature(iter_array_chunks)]
 #![feature(assert_matches)]
 #![feature(btree_extract_if)]
+#![feature(char_max_len)]
 #![feature(cow_is_borrowed)]
 #![feature(core_intrinsics)]
 #![feature(downcast_unchecked)]
diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs
@@ -2,6 +2,7 @@
 
 use std::assert_matches::assert_matches;
 use std::borrow::Cow;
+use std::char::MAX_LEN_UTF8;
 use std::cmp::Ordering::{Equal, Greater, Less};
 use std::str::{from_utf8, from_utf8_unchecked};
 
@@ -1231,7 +1232,7 @@ fn test_to_uppercase_rev_iterator() {
 #[test]
 #[cfg_attr(miri, ignore)] // Miri is too slow
 fn test_chars_decoding() {
-    let mut bytes = [0; 4];
+    let mut bytes = [0; MAX_LEN_UTF8];
     for c in (0..0x110000).filter_map(std::char::from_u32) {
         let s = c.encode_utf8(&mut bytes);
         if Some(c) != s.chars().next() {
@@ -1243,7 +1244,7 @@ fn test_chars_decoding() {
 #[test]
 #[cfg_attr(miri, ignore)] // Miri is too slow
 fn test_chars_rev_decoding() {
-    let mut bytes = [0; 4];
+    let mut bytes = [0; MAX_LEN_UTF8];
     for c in (0..0x110000).filter_map(std::char::from_u32) {
         let s = c.encode_utf8(&mut bytes);
         if Some(c) != s.chars().rev().next() {
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
@@ -71,6 +71,16 @@ impl char {
     #[stable(feature = "assoc_char_consts", since = "1.52.0")]
     pub const MAX: char = '\u{10FFFF}';
 
+    /// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
+    /// UTF-8 encoding.
+    #[unstable(feature = "char_max_len", issue = "121714")]
+    pub const MAX_LEN_UTF8: usize = 4;
+
+    /// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
+    /// to UTF-16 encoding.
+    #[unstable(feature = "char_max_len", issue = "121714")]
+    pub const MAX_LEN_UTF16: usize = 2;
+
     /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
     /// decoding error.
     ///
diff --git a/library/core/src/char/mod.rs b/library/core/src/char/mod.rs
@@ -95,6 +95,16 @@ const MAX_THREE_B: u32 = 0x10000;
 #[stable(feature = "rust1", since = "1.0.0")]
 pub const MAX: char = char::MAX;
 
+/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
+/// UTF-8 encoding.
+#[unstable(feature = "char_max_len", issue = "121714")]
+pub const MAX_LEN_UTF8: usize = char::MAX_LEN_UTF8;
+
+/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
+/// to UTF-16 encoding.
+#[unstable(feature = "char_max_len", issue = "121714")]
+pub const MAX_LEN_UTF16: usize = char::MAX_LEN_UTF16;
+
 /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
 /// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
 #[stable(feature = "decode_utf16", since = "1.9.0")]
diff --git a/library/core/src/fmt/mod.rs b/library/core/src/fmt/mod.rs
@@ -3,7 +3,7 @@
 #![stable(feature = "rust1", since = "1.0.0")]
 
 use crate::cell::{Cell, Ref, RefCell, RefMut, SyncUnsafeCell, UnsafeCell};
-use crate::char::EscapeDebugExtArgs;
+use crate::char::{EscapeDebugExtArgs, MAX_LEN_UTF8};
 use crate::marker::PhantomData;
 use crate::num::fmt as numfmt;
 use crate::ops::Deref;
@@ -187,7 +187,7 @@ pub trait Write {
     /// ```
     #[stable(feature = "fmt_write_char", since = "1.1.0")]
     fn write_char(&mut self, c: char) -> Result {
-        self.write_str(c.encode_utf8(&mut [0; 4]))
+        self.write_str(c.encode_utf8(&mut [0; MAX_LEN_UTF8]))
     }
 
     /// Glue for usage of the [`write!`] macro with implementors of this trait.
@@ -2768,7 +2768,7 @@ impl Display for char {
         if f.options.width.is_none() && f.options.precision.is_none() {
             f.write_char(*self)
         } else {
-            f.pad(self.encode_utf8(&mut [0; 4]))
+            f.pad(self.encode_utf8(&mut [0; MAX_LEN_UTF8]))
         }
     }
 }
diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs
@@ -38,6 +38,7 @@
     issue = "27721"
 )]
 
+use crate::char::MAX_LEN_UTF8;
 use crate::cmp::Ordering;
 use crate::convert::TryInto as _;
 use crate::slice::memchr;
@@ -561,8 +562,8 @@ impl Pattern for char {
     type Searcher<'a> = CharSearcher<'a>;
 
     #[inline]
-    fn into_searcher(self, haystack: &str) -> Self::Searcher<'_> {
-        let mut utf8_encoded = [0; 4];
+    fn into_searcher<'a>(self, haystack: &'a str) -> Self::Searcher<'a> {
+        let mut utf8_encoded = [0; MAX_LEN_UTF8];
         let utf8_size = self
             .encode_utf8(&mut utf8_encoded)
             .len()
diff --git a/library/coretests/tests/char.rs b/library/coretests/tests/char.rs
@@ -1,3 +1,4 @@
+use std::char::MAX_LEN_UTF8;
 use std::str::FromStr;
 use std::{char, str};
 
@@ -259,7 +260,7 @@ fn test_escape_unicode() {
 #[test]
 fn test_encode_utf8() {
     fn check(input: char, expect: &[u8]) {
-        let mut buf = [0; 4];
+        let mut buf = [0; MAX_LEN_UTF8];
         let ptr = buf.as_ptr();
         let s = input.encode_utf8(&mut buf);
         assert_eq!(s.as_ptr() as usize, ptr as usize);
diff --git a/library/coretests/tests/lib.rs b/library/coretests/tests/lib.rs
@@ -13,8 +13,22 @@
 #![feature(bigint_helper_methods)]
 #![feature(bstr)]
 #![feature(cell_update)]
+#![feature(char_max_len)]
 #![feature(clone_to_uninit)]
+#![feature(const_align_offset)]
+#![feature(const_align_of_val_raw)]
+#![feature(const_black_box)]
+#![feature(const_caller_location)]
+#![feature(const_cell_into_inner)]
 #![feature(const_eval_select)]
+#![feature(const_hash)]
+#![feature(const_heap)]
+#![feature(const_intrinsic_copy)]
+#![feature(const_maybe_uninit_as_mut_ptr)]
+#![feature(const_nonnull_new)]
+#![feature(const_pointer_is_aligned)]
+#![feature(const_ptr_as_ref)]
+#![feature(const_ptr_write)]
 #![feature(const_swap_nonoverlapping)]
 #![feature(const_trait_impl)]
 #![feature(core_intrinsics)]
diff --git a/library/std/src/fs/tests.rs b/library/std/src/fs/tests.rs
@@ -1,5 +1,6 @@
 use rand::RngCore;
 
+use crate::char::MAX_LEN_UTF8;
 use crate::fs::{self, File, FileTimes, OpenOptions};
 use crate::io::prelude::*;
 use crate::io::{BorrowedBuf, ErrorKind, SeekFrom};
@@ -155,7 +156,7 @@ fn file_test_io_non_positional_read() {
 #[test]
 fn file_test_io_seek_and_tell_smoke_test() {
     let message = "ten-four";
-    let mut read_mem = [0; 4];
+    let mut read_mem = [0; MAX_LEN_UTF8];
     let set_cursor = 4 as u64;
     let tell_pos_pre_read;
     let tell_pos_post_read;
@@ -356,7 +357,7 @@ fn file_test_io_seek_shakedown() {
     let chunk_one: &str = "qwer";
     let chunk_two: &str = "asdf";
     let chunk_three: &str = "zxcv";
-    let mut read_mem = [0; 4];
+    let mut read_mem = [0; MAX_LEN_UTF8];
     let tmpdir = tmpdir();
     let filename = &tmpdir.join("file_rt_io_file_test_seek_shakedown.txt");
     {
@@ -621,7 +622,7 @@ fn file_test_directoryinfo_readdir() {
         check!(w.write(msg));
     }
     let files = check!(fs::read_dir(dir));
-    let mut mem = [0; 4];
+    let mut mem = [0; MAX_LEN_UTF8];
     for f in files {
         let f = f.unwrap().path();
         {
diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs
@@ -281,6 +281,7 @@
 #![feature(cfg_sanitizer_cfi)]
 #![feature(cfg_target_thread_local)]
 #![feature(cfi_encoding)]
+#![feature(char_max_len)]
 #![feature(concat_idents)]
 #![feature(decl_macro)]
 #![feature(deprecated_suggestion)]
diff --git a/library/std/src/sys/pal/windows/stdio.rs b/library/std/src/sys/pal/windows/stdio.rs
@@ -1,5 +1,6 @@
 #![unstable(issue = "none", feature = "windows_stdio")]
 
+use core::char::MAX_LEN_UTF8;
 use core::str::utf8_char_width;
 
 use super::api::{self, WinError};
@@ -426,7 +427,7 @@ fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {
 
 impl IncompleteUtf8 {
     pub const fn new() -> IncompleteUtf8 {
-        IncompleteUtf8 { bytes: [0; 4], len: 0 }
+        IncompleteUtf8 { bytes: [0; MAX_LEN_UTF8], len: 0 }
     }
 }
 
diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs
@@ -18,7 +18,7 @@
 #[cfg(test)]
 mod tests;
 
-use core::char::{encode_utf8_raw, encode_utf16_raw};
+use core::char::{MAX_LEN_UTF8, MAX_LEN_UTF16, encode_utf8_raw, encode_utf16_raw};
 use core::clone::CloneToUninit;
 use core::str::next_code_point;
 
@@ -117,7 +117,7 @@ impl CodePoint {
 
     /// Returns a Unicode scalar value for the code point.
     ///
-    /// Returns `'\u{FFFD}'` (the replacement character “�”)
+    /// Returns `'\u{FFFD}'` (the replacement character “ ”)
     /// if the code point is a surrogate (from U+D800 to U+DFFF).
     #[inline]
     pub fn to_char_lossy(&self) -> char {
@@ -240,7 +240,7 @@ impl Wtf8Buf {
     /// Copied from String::push
     /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check.
     fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
-        let mut bytes = [0; 4];
+        let mut bytes = [0; MAX_LEN_UTF8];
         let bytes = encode_utf8_raw(code_point.value, &mut bytes);
         self.bytes.extend_from_slice(bytes)
     }
@@ -438,7 +438,7 @@ impl Wtf8Buf {
     ///
     /// This does not copy the data (but may overwrite parts of it in place).
     ///
-    /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”)
+    /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “ ”)
     pub fn into_string_lossy(mut self) -> String {
         // Fast path: If we already have UTF-8, we can return it immediately.
         if self.is_known_utf8 {
@@ -668,7 +668,7 @@ impl Wtf8 {
     /// Lossily converts the string to UTF-8.
     /// Returns a UTF-8 `&str` slice if the contents are well-formed in UTF-8.
     ///
-    /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”).
+    /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “ ”).
     ///
     /// This only copies the data if necessary (if it contains any surrogate).
     pub fn to_string_lossy(&self) -> Cow<'_, str> {
@@ -1001,7 +1001,7 @@ impl<'a> Iterator for EncodeWide<'a> {
             return Some(tmp);
         }
 
-        let mut buf = [0; 2];
+        let mut buf = [0; MAX_LEN_UTF16];
         self.code_points.next().map(|code_point| {
             let n = encode_utf16_raw(code_point.value, &mut buf).len();
             if n == 2 {

Original file line number	Diff line number	Diff line change
`@@ -1419,7 +1419,9 @@ impl String {`
`1419`	`1419`	`pub fn push(&mut self, ch: char) {`
`1420`	`1420`	`match ch.len_utf8() {`
`1421`	`1421`	`1 => self.vec.push(ch as u8),`
`1422`		`- _ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),`
	`1422`	`+ _ => {`
	`1423`	`+ self.vec.extend_from_slice(ch.encode_utf8(&mut [0; char::MAX_LEN_UTF8]).as_bytes())`
	`1424`	`+ }`
`1423`	`1425`	`}`
`1424`	`1426`	`}`
`1425`	`1427`
`@@ -1716,7 +1718,7 @@ impl String {`
`1716`	`1718`	`#[rustc_confusables("set")]`
`1717`	`1719`	`pub fn insert(&mut self, idx: usize, ch: char) {`
`1718`	`1720`	`assert!(self.is_char_boundary(idx));`
`1719`		`- let mut bits = [0; 4];`
	`1721`	`+ let mut bits = [0; char::MAX_LEN_UTF8];`
`1720`	`1722`	`let bits = ch.encode_utf8(&mut bits).as_bytes();`
`1721`	`1723`
`1722`	`1724`	`unsafe {`
`@@ -2771,7 +2773,7 @@ impl SpecToString for core::ascii::Char {`
`2771`	`2773`	`impl SpecToString for char {`
`2772`	`2774`	`#[inline]`
`2773`	`2775`	`fn spec_to_string(&self) -> String {`
`2774`		`- String::from(self.encode_utf8(&mut [0; 4]))`
	`2776`	`+ String::from(self.encode_utf8(&mut [0; char::MAX_LEN_UTF8]))`
`2775`	`2777`	`}`
`2776`	`2778`	`}`
`2777`	`2779`
Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@`
`3`	`3`	`#![stable(feature = "rust1", since = "1.0.0")]`
`4`	`4`
`5`	`5`	`use crate::cell::{Cell, Ref, RefCell, RefMut, SyncUnsafeCell, UnsafeCell};`
`6`		`-use crate::char::EscapeDebugExtArgs;`
	`6`	`+use crate::char::{EscapeDebugExtArgs, MAX_LEN_UTF8};`
`7`	`7`	`use crate::marker::PhantomData;`
`8`	`8`	`use crate::num::fmt as numfmt;`
`9`	`9`	`use crate::ops::Deref;`
`@@ -187,7 +187,7 @@ pub trait Write {`
`187`	`187`	/// ```
`188`	`188`	`#[stable(feature = "fmt_write_char", since = "1.1.0")]`
`189`	`189`	`fn write_char(&mut self, c: char) -> Result {`
`190`		`- self.write_str(c.encode_utf8(&mut [0; 4]))`
	`190`	`+ self.write_str(c.encode_utf8(&mut [0; MAX_LEN_UTF8]))`
`191`	`191`	`}`
`192`	`192`
`193`	`193`	/// Glue for usage of the [`write!`] macro with implementors of this trait.
`@@ -2768,7 +2768,7 @@ impl Display for char {`
`2768`	`2768`	`if f.options.width.is_none() && f.options.precision.is_none() {`
`2769`	`2769`	`f.write_char(*self)`
`2770`	`2770`	`} else {`
`2771`		`- f.pad(self.encode_utf8(&mut [0; 4]))`
	`2771`	`+ f.pad(self.encode_utf8(&mut [0; MAX_LEN_UTF8]))`
`2772`	`2772`	`}`
`2773`	`2773`	`}`
`2774`	`2774`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,6 @@`
`1`	`1`	`#![unstable(issue = "none", feature = "windows_stdio")]`
`2`	`2`
	`3`	`+use core::char::MAX_LEN_UTF8;`
`3`	`4`	`use core::str::utf8_char_width;`
`4`	`5`
`5`	`6`	`use super::api::{self, WinError};`
`@@ -426,7 +427,7 @@ fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {`
`426`	`427`
`427`	`428`	`impl IncompleteUtf8 {`
`428`	`429`	`pub const fn new() -> IncompleteUtf8 {`
`429`		`- IncompleteUtf8 { bytes: [0; 4], len: 0 }`
	`430`	`+ IncompleteUtf8 { bytes: [0; MAX_LEN_UTF8], len: 0 }`
`430`	`431`	`}`
`431`	`432`	`}`
`432`	`433`