core/char/
methods.rs

1//! impl char {}
2
3use super::*;
4#[cfg(not(feature = "ferrocene_certified"))]
5use crate::panic::const_panic;
6#[cfg(not(feature = "ferrocene_certified"))]
7use crate::slice;
8#[cfg(not(feature = "ferrocene_certified"))]
9use crate::str::from_utf8_unchecked_mut;
10#[cfg(not(feature = "ferrocene_certified"))]
11use crate::ub_checks::assert_unsafe_precondition;
12#[cfg(not(feature = "ferrocene_certified"))]
13use crate::unicode::printable::is_printable;
14#[cfg(not(feature = "ferrocene_certified"))]
15use crate::unicode::{self, conversions};
16
17impl char {
18    /// The lowest valid code point a `char` can have, `'\0'`.
19    ///
20    /// Unlike integer types, `char` actually has a gap in the middle,
21    /// meaning that the range of possible `char`s is smaller than you
22    /// might expect. Ranges of `char` will automatically hop this gap
23    /// for you:
24    ///
25    /// ```
26    /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
27    /// let size = (char::MIN..=char::MAX).count() as u32;
28    /// assert!(size < dist);
29    /// ```
30    ///
31    /// Despite this gap, the `MIN` and [`MAX`] values can be used as bounds for
32    /// all `char` values.
33    ///
34    /// [`MAX`]: char::MAX
35    ///
36    /// # Examples
37    ///
38    /// ```
39    /// # fn something_which_returns_char() -> char { 'a' }
40    /// let c: char = something_which_returns_char();
41    /// assert!(char::MIN <= c);
42    ///
43    /// let value_at_min = u32::from(char::MIN);
44    /// assert_eq!(char::from_u32(value_at_min), Some('\0'));
45    /// ```
46    #[stable(feature = "char_min", since = "1.83.0")]
47    pub const MIN: char = '\0';
48
49    /// The highest valid code point a `char` can have, `'\u{10FFFF}'`.
50    ///
51    /// Unlike integer types, `char` actually has a gap in the middle,
52    /// meaning that the range of possible `char`s is smaller than you
53    /// might expect. Ranges of `char` will automatically hop this gap
54    /// for you:
55    ///
56    /// ```
57    /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
58    /// let size = (char::MIN..=char::MAX).count() as u32;
59    /// assert!(size < dist);
60    /// ```
61    ///
62    /// Despite this gap, the [`MIN`] and `MAX` values can be used as bounds for
63    /// all `char` values.
64    ///
65    /// [`MIN`]: char::MIN
66    ///
67    /// # Examples
68    ///
69    /// ```
70    /// # fn something_which_returns_char() -> char { 'a' }
71    /// let c: char = something_which_returns_char();
72    /// assert!(c <= char::MAX);
73    ///
74    /// let value_at_max = u32::from(char::MAX);
75    /// assert_eq!(char::from_u32(value_at_max), Some('\u{10FFFF}'));
76    /// assert_eq!(char::from_u32(value_at_max + 1), None);
77    /// ```
78    #[stable(feature = "assoc_char_consts", since = "1.52.0")]
79    pub const MAX: char = '\u{10FFFF}';
80
81    /// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
82    /// UTF-8 encoding.
83    #[stable(feature = "char_max_len_assoc", since = "CURRENT_RUSTC_VERSION")]
84    pub const MAX_LEN_UTF8: usize = 4;
85
86    /// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
87    /// to UTF-16 encoding.
88    #[stable(feature = "char_max_len_assoc", since = "CURRENT_RUSTC_VERSION")]
89    pub const MAX_LEN_UTF16: usize = 2;
90
91    /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
92    /// decoding error.
93    ///
94    /// It can occur, for example, when giving ill-formed UTF-8 bytes to
95    /// [`String::from_utf8_lossy`](../std/string/struct.String.html#method.from_utf8_lossy).
96    #[stable(feature = "assoc_char_consts", since = "1.52.0")]
97    pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
98
99    /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
100    /// `char` and `str` methods are based on.
101    ///
102    /// New versions of Unicode are released regularly and subsequently all methods
103    /// in the standard library depending on Unicode are updated. Therefore the
104    /// behavior of some `char` and `str` methods and the value of this constant
105    /// changes over time. This is *not* considered to be a breaking change.
106    ///
107    /// The version numbering scheme is explained in
108    /// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4).
109    #[stable(feature = "assoc_char_consts", since = "1.52.0")]
110    #[cfg(not(feature = "ferrocene_certified"))]
111    pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION;
112
113    /// Creates an iterator over the native endian UTF-16 encoded code points in `iter`,
114    /// returning unpaired surrogates as `Err`s.
115    ///
116    /// # Examples
117    ///
118    /// Basic usage:
119    ///
120    /// ```
121    /// // 𝄞mus<invalid>ic<invalid>
122    /// let v = [
123    ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
124    /// ];
125    ///
126    /// assert_eq!(
127    ///     char::decode_utf16(v)
128    ///         .map(|r| r.map_err(|e| e.unpaired_surrogate()))
129    ///         .collect::<Vec<_>>(),
130    ///     vec![
131    ///         Ok('𝄞'),
132    ///         Ok('m'), Ok('u'), Ok('s'),
133    ///         Err(0xDD1E),
134    ///         Ok('i'), Ok('c'),
135    ///         Err(0xD834)
136    ///     ]
137    /// );
138    /// ```
139    ///
140    /// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
141    ///
142    /// ```
143    /// // 𝄞mus<invalid>ic<invalid>
144    /// let v = [
145    ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
146    /// ];
147    ///
148    /// assert_eq!(
149    ///     char::decode_utf16(v)
150    ///        .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER))
151    ///        .collect::<String>(),
152    ///     "𝄞mus�ic�"
153    /// );
154    /// ```
155    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
156    #[inline]
157    #[cfg(not(feature = "ferrocene_certified"))]
158    pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
159        super::decode::decode_utf16(iter)
160    }
161
162    /// Converts a `u32` to a `char`.
163    ///
164    /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
165    /// [`as`](../std/keyword.as.html):
166    ///
167    /// ```
168    /// let c = '💯';
169    /// let i = c as u32;
170    ///
171    /// assert_eq!(128175, i);
172    /// ```
173    ///
174    /// However, the reverse is not true: not all valid [`u32`]s are valid
175    /// `char`s. `from_u32()` will return `None` if the input is not a valid value
176    /// for a `char`.
177    ///
178    /// For an unsafe version of this function which ignores these checks, see
179    /// [`from_u32_unchecked`].
180    ///
181    /// [`from_u32_unchecked`]: #method.from_u32_unchecked
182    ///
183    /// # Examples
184    ///
185    /// Basic usage:
186    ///
187    /// ```
188    /// let c = char::from_u32(0x2764);
189    ///
190    /// assert_eq!(Some('❤'), c);
191    /// ```
192    ///
193    /// Returning `None` when the input is not a valid `char`:
194    ///
195    /// ```
196    /// let c = char::from_u32(0x110000);
197    ///
198    /// assert_eq!(None, c);
199    /// ```
200    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
201    #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
202    #[must_use]
203    #[inline]
204    #[cfg(not(feature = "ferrocene_certified"))]
205    pub const fn from_u32(i: u32) -> Option<char> {
206        super::convert::from_u32(i)
207    }
208
209    /// Converts a `u32` to a `char`, ignoring validity.
210    ///
211    /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
212    /// `as`:
213    ///
214    /// ```
215    /// let c = '💯';
216    /// let i = c as u32;
217    ///
218    /// assert_eq!(128175, i);
219    /// ```
220    ///
221    /// However, the reverse is not true: not all valid [`u32`]s are valid
222    /// `char`s. `from_u32_unchecked()` will ignore this, and blindly cast to
223    /// `char`, possibly creating an invalid one.
224    ///
225    /// # Safety
226    ///
227    /// This function is unsafe, as it may construct invalid `char` values.
228    ///
229    /// For a safe version of this function, see the [`from_u32`] function.
230    ///
231    /// [`from_u32`]: #method.from_u32
232    ///
233    /// # Examples
234    ///
235    /// Basic usage:
236    ///
237    /// ```
238    /// let c = unsafe { char::from_u32_unchecked(0x2764) };
239    ///
240    /// assert_eq!('❤', c);
241    /// ```
242    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
243    #[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")]
244    #[must_use]
245    #[inline]
246    pub const unsafe fn from_u32_unchecked(i: u32) -> char {
247        // SAFETY: the safety contract must be upheld by the caller.
248        unsafe { super::convert::from_u32_unchecked(i) }
249    }
250
251    /// Converts a digit in the given radix to a `char`.
252    ///
253    /// A 'radix' here is sometimes also called a 'base'. A radix of two
254    /// indicates a binary number, a radix of ten, decimal, and a radix of
255    /// sixteen, hexadecimal, to give some common values. Arbitrary
256    /// radices are supported.
257    ///
258    /// `from_digit()` will return `None` if the input is not a digit in
259    /// the given radix.
260    ///
261    /// # Panics
262    ///
263    /// Panics if given a radix larger than 36.
264    ///
265    /// # Examples
266    ///
267    /// Basic usage:
268    ///
269    /// ```
270    /// let c = char::from_digit(4, 10);
271    ///
272    /// assert_eq!(Some('4'), c);
273    ///
274    /// // Decimal 11 is a single digit in base 16
275    /// let c = char::from_digit(11, 16);
276    ///
277    /// assert_eq!(Some('b'), c);
278    /// ```
279    ///
280    /// Returning `None` when the input is not a digit:
281    ///
282    /// ```
283    /// let c = char::from_digit(20, 10);
284    ///
285    /// assert_eq!(None, c);
286    /// ```
287    ///
288    /// Passing a large radix, causing a panic:
289    ///
290    /// ```should_panic
291    /// // this panics
292    /// let _c = char::from_digit(1, 37);
293    /// ```
294    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
295    #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
296    #[must_use]
297    #[inline]
298    #[cfg(not(feature = "ferrocene_certified"))]
299    pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
300        super::convert::from_digit(num, radix)
301    }
302
303    /// Checks if a `char` is a digit in the given radix.
304    ///
305    /// A 'radix' here is sometimes also called a 'base'. A radix of two
306    /// indicates a binary number, a radix of ten, decimal, and a radix of
307    /// sixteen, hexadecimal, to give some common values. Arbitrary
308    /// radices are supported.
309    ///
310    /// Compared to [`is_numeric()`], this function only recognizes the characters
311    /// `0-9`, `a-z` and `A-Z`.
312    ///
313    /// 'Digit' is defined to be only the following characters:
314    ///
315    /// * `0-9`
316    /// * `a-z`
317    /// * `A-Z`
318    ///
319    /// For a more comprehensive understanding of 'digit', see [`is_numeric()`].
320    ///
321    /// [`is_numeric()`]: #method.is_numeric
322    ///
323    /// # Panics
324    ///
325    /// Panics if given a radix smaller than 2 or larger than 36.
326    ///
327    /// # Examples
328    ///
329    /// Basic usage:
330    ///
331    /// ```
332    /// assert!('1'.is_digit(10));
333    /// assert!('f'.is_digit(16));
334    /// assert!(!'f'.is_digit(10));
335    /// ```
336    ///
337    /// Passing a large radix, causing a panic:
338    ///
339    /// ```should_panic
340    /// // this panics
341    /// '1'.is_digit(37);
342    /// ```
343    ///
344    /// Passing a small radix, causing a panic:
345    ///
346    /// ```should_panic
347    /// // this panics
348    /// '1'.is_digit(1);
349    /// ```
350    #[stable(feature = "rust1", since = "1.0.0")]
351    #[rustc_const_stable(feature = "const_char_classify", since = "1.87.0")]
352    #[inline]
353    #[cfg(not(feature = "ferrocene_certified"))]
354    pub const fn is_digit(self, radix: u32) -> bool {
355        self.to_digit(radix).is_some()
356    }
357
358    /// Converts a `char` to a digit in the given radix.
359    ///
360    /// A 'radix' here is sometimes also called a 'base'. A radix of two
361    /// indicates a binary number, a radix of ten, decimal, and a radix of
362    /// sixteen, hexadecimal, to give some common values. Arbitrary
363    /// radices are supported.
364    ///
365    /// 'Digit' is defined to be only the following characters:
366    ///
367    /// * `0-9`
368    /// * `a-z`
369    /// * `A-Z`
370    ///
371    /// # Errors
372    ///
373    /// Returns `None` if the `char` does not refer to a digit in the given radix.
374    ///
375    /// # Panics
376    ///
377    /// Panics if given a radix smaller than 2 or larger than 36.
378    ///
379    /// # Examples
380    ///
381    /// Basic usage:
382    ///
383    /// ```
384    /// assert_eq!('1'.to_digit(10), Some(1));
385    /// assert_eq!('f'.to_digit(16), Some(15));
386    /// ```
387    ///
388    /// Passing a non-digit results in failure:
389    ///
390    /// ```
391    /// assert_eq!('f'.to_digit(10), None);
392    /// assert_eq!('z'.to_digit(16), None);
393    /// ```
394    ///
395    /// Passing a large radix, causing a panic:
396    ///
397    /// ```should_panic
398    /// // this panics
399    /// let _ = '1'.to_digit(37);
400    /// ```
401    /// Passing a small radix, causing a panic:
402    ///
403    /// ```should_panic
404    /// // this panics
405    /// let _ = '1'.to_digit(1);
406    /// ```
407    #[stable(feature = "rust1", since = "1.0.0")]
408    #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
409    #[rustc_diagnostic_item = "char_to_digit"]
410    #[must_use = "this returns the result of the operation, \
411                  without modifying the original"]
412    #[inline]
413    pub const fn to_digit(self, radix: u32) -> Option<u32> {
414        assert!(
415            radix >= 2 && radix <= 36,
416            "to_digit: invalid radix -- radix must be in the range 2 to 36 inclusive"
417        );
418        // check radix to remove letter handling code when radix is a known constant
419        let value = if self > '9' && radix > 10 {
420            // mask to convert ASCII letters to uppercase
421            const TO_UPPERCASE_MASK: u32 = !0b0010_0000;
422            // Converts an ASCII letter to its corresponding integer value:
423            // A-Z => 10-35, a-z => 10-35. Other characters produce values >= 36.
424            //
425            // Add Overflow Safety:
426            // By applying the mask after the subtraction, the first addendum is
427            // constrained such that it never exceeds u32::MAX - 0x20.
428            ((self as u32).wrapping_sub('A' as u32) & TO_UPPERCASE_MASK) + 10
429        } else {
430            // convert digit to value, non-digits wrap to values > 36
431            (self as u32).wrapping_sub('0' as u32)
432        };
433        // FIXME(const-hack): once then_some is const fn, use it here
434        if value < radix { Some(value) } else { None }
435    }
436
437    /// Returns an iterator that yields the hexadecimal Unicode escape of a
438    /// character as `char`s.
439    ///
440    /// This will escape characters with the Rust syntax of the form
441    /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation.
442    ///
443    /// # Examples
444    ///
445    /// As an iterator:
446    ///
447    /// ```
448    /// for c in '❤'.escape_unicode() {
449    ///     print!("{c}");
450    /// }
451    /// println!();
452    /// ```
453    ///
454    /// Using `println!` directly:
455    ///
456    /// ```
457    /// println!("{}", '❤'.escape_unicode());
458    /// ```
459    ///
460    /// Both are equivalent to:
461    ///
462    /// ```
463    /// println!("\\u{{2764}}");
464    /// ```
465    ///
466    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
467    ///
468    /// ```
469    /// assert_eq!('❤'.escape_unicode().to_string(), "\\u{2764}");
470    /// ```
471    #[must_use = "this returns the escaped char as an iterator, \
472                  without modifying the original"]
473    #[stable(feature = "rust1", since = "1.0.0")]
474    #[inline]
475    #[cfg(not(feature = "ferrocene_certified"))]
476    pub fn escape_unicode(self) -> EscapeUnicode {
477        EscapeUnicode::new(self)
478    }
479
480    /// An extended version of `escape_debug` that optionally permits escaping
481    /// Extended Grapheme codepoints, single quotes, and double quotes. This
482    /// allows us to format characters like nonspacing marks better when they're
483    /// at the start of a string, and allows escaping single quotes in
484    /// characters, and double quotes in strings.
485    #[inline]
486    #[cfg(not(feature = "ferrocene_certified"))]
487    pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
488        match self {
489            '\0' => EscapeDebug::backslash(ascii::Char::Digit0),
490            '\t' => EscapeDebug::backslash(ascii::Char::SmallT),
491            '\r' => EscapeDebug::backslash(ascii::Char::SmallR),
492            '\n' => EscapeDebug::backslash(ascii::Char::SmallN),
493            '\\' => EscapeDebug::backslash(ascii::Char::ReverseSolidus),
494            '\"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark),
495            '\'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe),
496            _ if args.escape_grapheme_extended && self.is_grapheme_extended() => {
497                EscapeDebug::unicode(self)
498            }
499            _ if is_printable(self) => EscapeDebug::printable(self),
500            _ => EscapeDebug::unicode(self),
501        }
502    }
503
504    /// Returns an iterator that yields the literal escape code of a character
505    /// as `char`s.
506    ///
507    /// This will escape the characters similar to the [`Debug`](core::fmt::Debug) implementations
508    /// of `str` or `char`.
509    ///
510    /// # Examples
511    ///
512    /// As an iterator:
513    ///
514    /// ```
515    /// for c in '\n'.escape_debug() {
516    ///     print!("{c}");
517    /// }
518    /// println!();
519    /// ```
520    ///
521    /// Using `println!` directly:
522    ///
523    /// ```
524    /// println!("{}", '\n'.escape_debug());
525    /// ```
526    ///
527    /// Both are equivalent to:
528    ///
529    /// ```
530    /// println!("\\n");
531    /// ```
532    ///
533    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
534    ///
535    /// ```
536    /// assert_eq!('\n'.escape_debug().to_string(), "\\n");
537    /// ```
538    #[must_use = "this returns the escaped char as an iterator, \
539                  without modifying the original"]
540    #[stable(feature = "char_escape_debug", since = "1.20.0")]
541    #[inline]
542    #[cfg(not(feature = "ferrocene_certified"))]
543    pub fn escape_debug(self) -> EscapeDebug {
544        self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL)
545    }
546
547    /// Returns an iterator that yields the literal escape code of a character
548    /// as `char`s.
549    ///
550    /// The default is chosen with a bias toward producing literals that are
551    /// legal in a variety of languages, including C++11 and similar C-family
552    /// languages. The exact rules are:
553    ///
554    /// * Tab is escaped as `\t`.
555    /// * Carriage return is escaped as `\r`.
556    /// * Line feed is escaped as `\n`.
557    /// * Single quote is escaped as `\'`.
558    /// * Double quote is escaped as `\"`.
559    /// * Backslash is escaped as `\\`.
560    /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e`
561    ///   inclusive is not escaped.
562    /// * All other characters are given hexadecimal Unicode escapes; see
563    ///   [`escape_unicode`].
564    ///
565    /// [`escape_unicode`]: #method.escape_unicode
566    ///
567    /// # Examples
568    ///
569    /// As an iterator:
570    ///
571    /// ```
572    /// for c in '"'.escape_default() {
573    ///     print!("{c}");
574    /// }
575    /// println!();
576    /// ```
577    ///
578    /// Using `println!` directly:
579    ///
580    /// ```
581    /// println!("{}", '"'.escape_default());
582    /// ```
583    ///
584    /// Both are equivalent to:
585    ///
586    /// ```
587    /// println!("\\\"");
588    /// ```
589    ///
590    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
591    ///
592    /// ```
593    /// assert_eq!('"'.escape_default().to_string(), "\\\"");
594    /// ```
595    #[must_use = "this returns the escaped char as an iterator, \
596                  without modifying the original"]
597    #[stable(feature = "rust1", since = "1.0.0")]
598    #[inline]
599    #[cfg(not(feature = "ferrocene_certified"))]
600    pub fn escape_default(self) -> EscapeDefault {
601        match self {
602            '\t' => EscapeDefault::backslash(ascii::Char::SmallT),
603            '\r' => EscapeDefault::backslash(ascii::Char::SmallR),
604            '\n' => EscapeDefault::backslash(ascii::Char::SmallN),
605            '\\' | '\'' | '\"' => EscapeDefault::backslash(self.as_ascii().unwrap()),
606            '\x20'..='\x7e' => EscapeDefault::printable(self.as_ascii().unwrap()),
607            _ => EscapeDefault::unicode(self),
608        }
609    }
610
611    /// Returns the number of bytes this `char` would need if encoded in UTF-8.
612    ///
613    /// That number of bytes is always between 1 and 4, inclusive.
614    ///
615    /// # Examples
616    ///
617    /// Basic usage:
618    ///
619    /// ```
620    /// let len = 'A'.len_utf8();
621    /// assert_eq!(len, 1);
622    ///
623    /// let len = 'ß'.len_utf8();
624    /// assert_eq!(len, 2);
625    ///
626    /// let len = 'ℝ'.len_utf8();
627    /// assert_eq!(len, 3);
628    ///
629    /// let len = '💣'.len_utf8();
630    /// assert_eq!(len, 4);
631    /// ```
632    ///
633    /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it
634    /// would take if each code point was represented as a `char` vs in the `&str` itself:
635    ///
636    /// ```
637    /// // as chars
638    /// let eastern = '東';
639    /// let capital = '京';
640    ///
641    /// // both can be represented as three bytes
642    /// assert_eq!(3, eastern.len_utf8());
643    /// assert_eq!(3, capital.len_utf8());
644    ///
645    /// // as a &str, these two are encoded in UTF-8
646    /// let tokyo = "東京";
647    ///
648    /// let len = eastern.len_utf8() + capital.len_utf8();
649    ///
650    /// // we can see that they take six bytes total...
651    /// assert_eq!(6, tokyo.len());
652    ///
653    /// // ... just like the &str
654    /// assert_eq!(len, tokyo.len());
655    /// ```
656    #[stable(feature = "rust1", since = "1.0.0")]
657    #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
658    #[inline]
659    #[must_use]
660    pub const fn len_utf8(self) -> usize {
661        len_utf8(self as u32)
662    }
663
664    /// Returns the number of 16-bit code units this `char` would need if
665    /// encoded in UTF-16.
666    ///
667    /// That number of code units is always either 1 or 2, for unicode scalar values in
668    /// the [basic multilingual plane] or [supplementary planes] respectively.
669    ///
670    /// See the documentation for [`len_utf8()`] for more explanation of this
671    /// concept. This function is a mirror, but for UTF-16 instead of UTF-8.
672    ///
673    /// [basic multilingual plane]: http://www.unicode.org/glossary/#basic_multilingual_plane
674    /// [supplementary planes]: http://www.unicode.org/glossary/#supplementary_planes
675    /// [`len_utf8()`]: #method.len_utf8
676    ///
677    /// # Examples
678    ///
679    /// Basic usage:
680    ///
681    /// ```
682    /// let n = 'ß'.len_utf16();
683    /// assert_eq!(n, 1);
684    ///
685    /// let len = '💣'.len_utf16();
686    /// assert_eq!(len, 2);
687    /// ```
688    #[stable(feature = "rust1", since = "1.0.0")]
689    #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
690    #[inline]
691    #[must_use]
692    #[cfg(not(feature = "ferrocene_certified"))]
693    pub const fn len_utf16(self) -> usize {
694        len_utf16(self as u32)
695    }
696
697    /// Encodes this character as UTF-8 into the provided byte buffer,
698    /// and then returns the subslice of the buffer that contains the encoded character.
699    ///
700    /// # Panics
701    ///
702    /// Panics if the buffer is not large enough.
703    /// A buffer of length four is large enough to encode any `char`.
704    ///
705    /// # Examples
706    ///
707    /// In both of these examples, 'ß' takes two bytes to encode.
708    ///
709    /// ```
710    /// let mut b = [0; 2];
711    ///
712    /// let result = 'ß'.encode_utf8(&mut b);
713    ///
714    /// assert_eq!(result, "ß");
715    ///
716    /// assert_eq!(result.len(), 2);
717    /// ```
718    ///
719    /// A buffer that's too small:
720    ///
721    /// ```should_panic
722    /// let mut b = [0; 1];
723    ///
724    /// // this panics
725    /// 'ß'.encode_utf8(&mut b);
726    /// ```
727    #[stable(feature = "unicode_encode_char", since = "1.15.0")]
728    #[rustc_const_stable(feature = "const_char_encode_utf8", since = "1.83.0")]
729    #[inline]
730    #[cfg(not(feature = "ferrocene_certified"))]
731    pub const fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
732        // SAFETY: `char` is not a surrogate, so this is valid UTF-8.
733        unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
734    }
735
736    /// Encodes this character as native endian UTF-16 into the provided `u16` buffer,
737    /// and then returns the subslice of the buffer that contains the encoded character.
738    ///
739    /// # Panics
740    ///
741    /// Panics if the buffer is not large enough.
742    /// A buffer of length 2 is large enough to encode any `char`.
743    ///
744    /// # Examples
745    ///
746    /// In both of these examples, '𝕊' takes two `u16`s to encode.
747    ///
748    /// ```
749    /// let mut b = [0; 2];
750    ///
751    /// let result = '𝕊'.encode_utf16(&mut b);
752    ///
753    /// assert_eq!(result.len(), 2);
754    /// ```
755    ///
756    /// A buffer that's too small:
757    ///
758    /// ```should_panic
759    /// let mut b = [0; 1];
760    ///
761    /// // this panics
762    /// '𝕊'.encode_utf16(&mut b);
763    /// ```
764    #[stable(feature = "unicode_encode_char", since = "1.15.0")]
765    #[rustc_const_stable(feature = "const_char_encode_utf16", since = "1.84.0")]
766    #[inline]
767    #[cfg(not(feature = "ferrocene_certified"))]
768    pub const fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
769        encode_utf16_raw(self as u32, dst)
770    }
771
772    /// Returns `true` if this `char` has the `Alphabetic` property.
773    ///
774    /// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
775    /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
776    ///
777    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
778    /// [ucd]: https://www.unicode.org/reports/tr44/
779    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
780    ///
781    /// # Examples
782    ///
783    /// Basic usage:
784    ///
785    /// ```
786    /// assert!('a'.is_alphabetic());
787    /// assert!('京'.is_alphabetic());
788    ///
789    /// let c = '💝';
790    /// // love is many things, but it is not alphabetic
791    /// assert!(!c.is_alphabetic());
792    /// ```
793    #[must_use]
794    #[stable(feature = "rust1", since = "1.0.0")]
795    #[inline]
796    #[cfg(not(feature = "ferrocene_certified"))]
797    pub fn is_alphabetic(self) -> bool {
798        match self {
799            'a'..='z' | 'A'..='Z' => true,
800            c => c > '\x7f' && unicode::Alphabetic(c),
801        }
802    }
803
804    /// Returns `true` if this `char` has the `Lowercase` property.
805    ///
806    /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
807    /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
808    ///
809    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
810    /// [ucd]: https://www.unicode.org/reports/tr44/
811    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
812    ///
813    /// # Examples
814    ///
815    /// Basic usage:
816    ///
817    /// ```
818    /// assert!('a'.is_lowercase());
819    /// assert!('δ'.is_lowercase());
820    /// assert!(!'A'.is_lowercase());
821    /// assert!(!'Δ'.is_lowercase());
822    ///
823    /// // The various Chinese scripts and punctuation do not have case, and so:
824    /// assert!(!'中'.is_lowercase());
825    /// assert!(!' '.is_lowercase());
826    /// ```
827    ///
828    /// In a const context:
829    ///
830    /// ```
831    /// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ'.is_lowercase();
832    /// assert!(!CAPITAL_DELTA_IS_LOWERCASE);
833    /// ```
834    #[must_use]
835    #[stable(feature = "rust1", since = "1.0.0")]
836    #[rustc_const_stable(feature = "const_unicode_case_lookup", since = "1.84.0")]
837    #[inline]
838    #[cfg(not(feature = "ferrocene_certified"))]
839    pub const fn is_lowercase(self) -> bool {
840        match self {
841            'a'..='z' => true,
842            c => c > '\x7f' && unicode::Lowercase(c),
843        }
844    }
845
846    /// Returns `true` if this `char` has the `Uppercase` property.
847    ///
848    /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
849    /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
850    ///
851    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
852    /// [ucd]: https://www.unicode.org/reports/tr44/
853    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
854    ///
855    /// # Examples
856    ///
857    /// Basic usage:
858    ///
859    /// ```
860    /// assert!(!'a'.is_uppercase());
861    /// assert!(!'δ'.is_uppercase());
862    /// assert!('A'.is_uppercase());
863    /// assert!('Δ'.is_uppercase());
864    ///
865    /// // The various Chinese scripts and punctuation do not have case, and so:
866    /// assert!(!'中'.is_uppercase());
867    /// assert!(!' '.is_uppercase());
868    /// ```
869    ///
870    /// In a const context:
871    ///
872    /// ```
873    /// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ'.is_uppercase();
874    /// assert!(CAPITAL_DELTA_IS_UPPERCASE);
875    /// ```
876    #[must_use]
877    #[stable(feature = "rust1", since = "1.0.0")]
878    #[rustc_const_stable(feature = "const_unicode_case_lookup", since = "1.84.0")]
879    #[inline]
880    #[cfg(not(feature = "ferrocene_certified"))]
881    pub const fn is_uppercase(self) -> bool {
882        match self {
883            'A'..='Z' => true,
884            c => c > '\x7f' && unicode::Uppercase(c),
885        }
886    }
887
888    /// Returns `true` if this `char` has the `White_Space` property.
889    ///
890    /// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`].
891    ///
892    /// [ucd]: https://www.unicode.org/reports/tr44/
893    /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
894    ///
895    /// # Examples
896    ///
897    /// Basic usage:
898    ///
899    /// ```
900    /// assert!(' '.is_whitespace());
901    ///
902    /// // line break
903    /// assert!('\n'.is_whitespace());
904    ///
905    /// // a non-breaking space
906    /// assert!('\u{A0}'.is_whitespace());
907    ///
908    /// assert!(!'越'.is_whitespace());
909    /// ```
910    #[must_use]
911    #[stable(feature = "rust1", since = "1.0.0")]
912    #[rustc_const_stable(feature = "const_char_classify", since = "1.87.0")]
913    #[inline]
914    #[cfg(not(feature = "ferrocene_certified"))]
915    pub const fn is_whitespace(self) -> bool {
916        match self {
917            ' ' | '\x09'..='\x0d' => true,
918            c => c > '\x7f' && unicode::White_Space(c),
919        }
920    }
921
922    /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
923    ///
924    /// [`is_alphabetic()`]: #method.is_alphabetic
925    /// [`is_numeric()`]: #method.is_numeric
926    ///
927    /// # Examples
928    ///
929    /// Basic usage:
930    ///
931    /// ```
932    /// assert!('٣'.is_alphanumeric());
933    /// assert!('7'.is_alphanumeric());
934    /// assert!('৬'.is_alphanumeric());
935    /// assert!('¾'.is_alphanumeric());
936    /// assert!('①'.is_alphanumeric());
937    /// assert!('K'.is_alphanumeric());
938    /// assert!('و'.is_alphanumeric());
939    /// assert!('藏'.is_alphanumeric());
940    /// ```
941    #[must_use]
942    #[stable(feature = "rust1", since = "1.0.0")]
943    #[inline]
944    #[cfg(not(feature = "ferrocene_certified"))]
945    pub fn is_alphanumeric(self) -> bool {
946        if self.is_ascii() {
947            self.is_ascii_alphanumeric()
948        } else {
949            unicode::Alphabetic(self) || unicode::N(self)
950        }
951    }
952
953    /// Returns `true` if this `char` has the general category for control codes.
954    ///
955    /// Control codes (code points with the general category of `Cc`) are described in Chapter 4
956    /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
957    /// Database][ucd] [`UnicodeData.txt`].
958    ///
959    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
960    /// [ucd]: https://www.unicode.org/reports/tr44/
961    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
962    ///
963    /// # Examples
964    ///
965    /// Basic usage:
966    ///
967    /// ```
968    /// // U+009C, STRING TERMINATOR
969    /// assert!('œ'.is_control());
970    /// assert!(!'q'.is_control());
971    /// ```
972    #[must_use]
973    #[stable(feature = "rust1", since = "1.0.0")]
974    #[inline]
975    #[cfg(not(feature = "ferrocene_certified"))]
976    pub fn is_control(self) -> bool {
977        // According to
978        // https://www.unicode.org/policies/stability_policy.html#Property_Value,
979        // the set of codepoints in `Cc` will never change.
980        // So we can just hard-code the patterns to match against instead of using a table.
981        matches!(self, '\0'..='\x1f' | '\x7f'..='\u{9f}')
982    }
983
984    /// Returns `true` if this `char` has the `Grapheme_Extend` property.
985    ///
986    /// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
987    /// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
988    /// [`DerivedCoreProperties.txt`].
989    ///
990    /// [uax29]: https://www.unicode.org/reports/tr29/
991    /// [ucd]: https://www.unicode.org/reports/tr44/
992    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
993    #[must_use]
994    #[inline]
995    #[cfg(not(feature = "ferrocene_certified"))]
996    pub(crate) fn is_grapheme_extended(self) -> bool {
997        !self.is_ascii() && unicode::Grapheme_Extend(self)
998    }
999
1000    /// Returns `true` if this `char` has the `Cased` property.
1001    ///
1002    /// `Cased` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
1003    /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
1004    ///
1005    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1006    /// [ucd]: https://www.unicode.org/reports/tr44/
1007    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1008    #[must_use]
1009    #[inline]
1010    #[doc(hidden)]
1011    #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1012    #[cfg(not(feature = "ferrocene_certified"))]
1013    pub fn is_cased(self) -> bool {
1014        if self.is_ascii() { self.is_ascii_alphabetic() } else { unicode::Cased(self) }
1015    }
1016
1017    /// Returns `true` if this `char` has the `Case_Ignorable` property.
1018    ///
1019    /// `Case_Ignorable` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
1020    /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
1021    ///
1022    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1023    /// [ucd]: https://www.unicode.org/reports/tr44/
1024    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1025    #[must_use]
1026    #[inline]
1027    #[doc(hidden)]
1028    #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1029    #[cfg(not(feature = "ferrocene_certified"))]
1030    pub fn is_case_ignorable(self) -> bool {
1031        if self.is_ascii() {
1032            matches!(self, '\'' | '.' | ':' | '^' | '`')
1033        } else {
1034            unicode::Case_Ignorable(self)
1035        }
1036    }
1037
1038    /// Returns `true` if this `char` has one of the general categories for numbers.
1039    ///
1040    /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
1041    /// characters, and `No` for other numeric characters) are specified in the [Unicode Character
1042    /// Database][ucd] [`UnicodeData.txt`].
1043    ///
1044    /// This method doesn't cover everything that could be considered a number, e.g. ideographic numbers like '三'.
1045    /// If you want everything including characters with overlapping purposes then you might want to use
1046    /// a unicode or language-processing library that exposes the appropriate character properties instead
1047    /// of looking at the unicode categories.
1048    ///
1049    /// If you want to parse ASCII decimal digits (0-9) or ASCII base-N, use
1050    /// `is_ascii_digit` or `is_digit` instead.
1051    ///
1052    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1053    /// [ucd]: https://www.unicode.org/reports/tr44/
1054    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1055    ///
1056    /// # Examples
1057    ///
1058    /// Basic usage:
1059    ///
1060    /// ```
1061    /// assert!('٣'.is_numeric());
1062    /// assert!('7'.is_numeric());
1063    /// assert!('৬'.is_numeric());
1064    /// assert!('¾'.is_numeric());
1065    /// assert!('①'.is_numeric());
1066    /// assert!(!'K'.is_numeric());
1067    /// assert!(!'و'.is_numeric());
1068    /// assert!(!'藏'.is_numeric());
1069    /// assert!(!'三'.is_numeric());
1070    /// ```
1071    #[must_use]
1072    #[stable(feature = "rust1", since = "1.0.0")]
1073    #[inline]
1074    #[cfg(not(feature = "ferrocene_certified"))]
1075    pub fn is_numeric(self) -> bool {
1076        match self {
1077            '0'..='9' => true,
1078            c => c > '\x7f' && unicode::N(c),
1079        }
1080    }
1081
1082    /// Returns an iterator that yields the lowercase mapping of this `char` as one or more
1083    /// `char`s.
1084    ///
1085    /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
1086    ///
1087    /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
1088    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1089    ///
1090    /// [ucd]: https://www.unicode.org/reports/tr44/
1091    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1092    ///
1093    /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
1094    /// the `char`(s) given by [`SpecialCasing.txt`].
1095    ///
1096    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1097    ///
1098    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1099    /// is independent of context and language.
1100    ///
1101    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1102    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1103    ///
1104    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1105    ///
1106    /// # Examples
1107    ///
1108    /// As an iterator:
1109    ///
1110    /// ```
1111    /// for c in 'İ'.to_lowercase() {
1112    ///     print!("{c}");
1113    /// }
1114    /// println!();
1115    /// ```
1116    ///
1117    /// Using `println!` directly:
1118    ///
1119    /// ```
1120    /// println!("{}", 'İ'.to_lowercase());
1121    /// ```
1122    ///
1123    /// Both are equivalent to:
1124    ///
1125    /// ```
1126    /// println!("i\u{307}");
1127    /// ```
1128    ///
1129    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1130    ///
1131    /// ```
1132    /// assert_eq!('C'.to_lowercase().to_string(), "c");
1133    ///
1134    /// // Sometimes the result is more than one character:
1135    /// assert_eq!('İ'.to_lowercase().to_string(), "i\u{307}");
1136    ///
1137    /// // Characters that do not have both uppercase and lowercase
1138    /// // convert into themselves.
1139    /// assert_eq!('山'.to_lowercase().to_string(), "山");
1140    /// ```
1141    #[must_use = "this returns the lowercase character as a new iterator, \
1142                  without modifying the original"]
1143    #[stable(feature = "rust1", since = "1.0.0")]
1144    #[inline]
1145    #[cfg(not(feature = "ferrocene_certified"))]
1146    pub fn to_lowercase(self) -> ToLowercase {
1147        ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
1148    }
1149
1150    /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
1151    /// `char`s.
1152    ///
1153    /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`.
1154    ///
1155    /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
1156    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1157    ///
1158    /// [ucd]: https://www.unicode.org/reports/tr44/
1159    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1160    ///
1161    /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
1162    /// the `char`(s) given by [`SpecialCasing.txt`].
1163    ///
1164    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1165    ///
1166    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1167    /// is independent of context and language.
1168    ///
1169    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1170    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1171    ///
1172    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1173    ///
1174    /// # Examples
1175    ///
1176    /// As an iterator:
1177    ///
1178    /// ```
1179    /// for c in 'ß'.to_uppercase() {
1180    ///     print!("{c}");
1181    /// }
1182    /// println!();
1183    /// ```
1184    ///
1185    /// Using `println!` directly:
1186    ///
1187    /// ```
1188    /// println!("{}", 'ß'.to_uppercase());
1189    /// ```
1190    ///
1191    /// Both are equivalent to:
1192    ///
1193    /// ```
1194    /// println!("SS");
1195    /// ```
1196    ///
1197    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1198    ///
1199    /// ```
1200    /// assert_eq!('c'.to_uppercase().to_string(), "C");
1201    ///
1202    /// // Sometimes the result is more than one character:
1203    /// assert_eq!('ß'.to_uppercase().to_string(), "SS");
1204    ///
1205    /// // Characters that do not have both uppercase and lowercase
1206    /// // convert into themselves.
1207    /// assert_eq!('山'.to_uppercase().to_string(), "山");
1208    /// ```
1209    ///
1210    /// # Note on locale
1211    ///
1212    /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
1213    ///
1214    /// * 'Dotless': I / ı, sometimes written ï
1215    /// * 'Dotted': İ / i
1216    ///
1217    /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
1218    ///
1219    /// ```
1220    /// let upper_i = 'i'.to_uppercase().to_string();
1221    /// ```
1222    ///
1223    /// The value of `upper_i` here relies on the language of the text: if we're
1224    /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should
1225    /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
1226    ///
1227    /// ```
1228    /// let upper_i = 'i'.to_uppercase().to_string();
1229    ///
1230    /// assert_eq!(upper_i, "I");
1231    /// ```
1232    ///
1233    /// holds across languages.
1234    #[must_use = "this returns the uppercase character as a new iterator, \
1235                  without modifying the original"]
1236    #[stable(feature = "rust1", since = "1.0.0")]
1237    #[inline]
1238    #[cfg(not(feature = "ferrocene_certified"))]
1239    pub fn to_uppercase(self) -> ToUppercase {
1240        ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
1241    }
1242
1243    /// Checks if the value is within the ASCII range.
1244    ///
1245    /// # Examples
1246    ///
1247    /// ```
1248    /// let ascii = 'a';
1249    /// let non_ascii = '❤';
1250    ///
1251    /// assert!(ascii.is_ascii());
1252    /// assert!(!non_ascii.is_ascii());
1253    /// ```
1254    #[must_use]
1255    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1256    #[rustc_const_stable(feature = "const_char_is_ascii", since = "1.32.0")]
1257    #[rustc_diagnostic_item = "char_is_ascii"]
1258    #[inline]
1259    #[cfg(not(feature = "ferrocene_certified"))]
1260    pub const fn is_ascii(&self) -> bool {
1261        *self as u32 <= 0x7F
1262    }
1263
1264    /// Returns `Some` if the value is within the ASCII range,
1265    /// or `None` if it's not.
1266    ///
1267    /// This is preferred to [`Self::is_ascii`] when you're passing the value
1268    /// along to something else that can take [`ascii::Char`] rather than
1269    /// needing to check again for itself whether the value is in ASCII.
1270    #[must_use]
1271    #[unstable(feature = "ascii_char", issue = "110998")]
1272    #[inline]
1273    #[cfg(not(feature = "ferrocene_certified"))]
1274    pub const fn as_ascii(&self) -> Option<ascii::Char> {
1275        if self.is_ascii() {
1276            // SAFETY: Just checked that this is ASCII.
1277            Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) })
1278        } else {
1279            None
1280        }
1281    }
1282
1283    /// Converts this char into an [ASCII character](`ascii::Char`), without
1284    /// checking whether it is valid.
1285    ///
1286    /// # Safety
1287    ///
1288    /// This char must be within the ASCII range, or else this is UB.
1289    #[must_use]
1290    #[unstable(feature = "ascii_char", issue = "110998")]
1291    #[inline]
1292    #[cfg(not(feature = "ferrocene_certified"))]
1293    pub const unsafe fn as_ascii_unchecked(&self) -> ascii::Char {
1294        assert_unsafe_precondition!(
1295            check_library_ub,
1296            "as_ascii_unchecked requires that the char is valid ASCII",
1297            (it: &char = self) => it.is_ascii()
1298        );
1299
1300        // SAFETY: the caller promised that this char is ASCII.
1301        unsafe { ascii::Char::from_u8_unchecked(*self as u8) }
1302    }
1303
1304    /// Makes a copy of the value in its ASCII upper case equivalent.
1305    ///
1306    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1307    /// but non-ASCII letters are unchanged.
1308    ///
1309    /// To uppercase the value in-place, use [`make_ascii_uppercase()`].
1310    ///
1311    /// To uppercase ASCII characters in addition to non-ASCII characters, use
1312    /// [`to_uppercase()`].
1313    ///
1314    /// # Examples
1315    ///
1316    /// ```
1317    /// let ascii = 'a';
1318    /// let non_ascii = '❤';
1319    ///
1320    /// assert_eq!('A', ascii.to_ascii_uppercase());
1321    /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
1322    /// ```
1323    ///
1324    /// [`make_ascii_uppercase()`]: #method.make_ascii_uppercase
1325    /// [`to_uppercase()`]: #method.to_uppercase
1326    #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`"]
1327    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1328    #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1329    #[inline]
1330    #[cfg(not(feature = "ferrocene_certified"))]
1331    pub const fn to_ascii_uppercase(&self) -> char {
1332        if self.is_ascii_lowercase() {
1333            (*self as u8).ascii_change_case_unchecked() as char
1334        } else {
1335            *self
1336        }
1337    }
1338
1339    /// Makes a copy of the value in its ASCII lower case equivalent.
1340    ///
1341    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1342    /// but non-ASCII letters are unchanged.
1343    ///
1344    /// To lowercase the value in-place, use [`make_ascii_lowercase()`].
1345    ///
1346    /// To lowercase ASCII characters in addition to non-ASCII characters, use
1347    /// [`to_lowercase()`].
1348    ///
1349    /// # Examples
1350    ///
1351    /// ```
1352    /// let ascii = 'A';
1353    /// let non_ascii = '❤';
1354    ///
1355    /// assert_eq!('a', ascii.to_ascii_lowercase());
1356    /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
1357    /// ```
1358    ///
1359    /// [`make_ascii_lowercase()`]: #method.make_ascii_lowercase
1360    /// [`to_lowercase()`]: #method.to_lowercase
1361    #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`"]
1362    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1363    #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1364    #[inline]
1365    #[cfg(not(feature = "ferrocene_certified"))]
1366    pub const fn to_ascii_lowercase(&self) -> char {
1367        if self.is_ascii_uppercase() {
1368            (*self as u8).ascii_change_case_unchecked() as char
1369        } else {
1370            *self
1371        }
1372    }
1373
1374    /// Checks that two values are an ASCII case-insensitive match.
1375    ///
1376    /// Equivalent to <code>[to_ascii_lowercase]\(a) == [to_ascii_lowercase]\(b)</code>.
1377    ///
1378    /// # Examples
1379    ///
1380    /// ```
1381    /// let upper_a = 'A';
1382    /// let lower_a = 'a';
1383    /// let lower_z = 'z';
1384    ///
1385    /// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
1386    /// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
1387    /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
1388    /// ```
1389    ///
1390    /// [to_ascii_lowercase]: #method.to_ascii_lowercase
1391    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1392    #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1393    #[inline]
1394    #[cfg(not(feature = "ferrocene_certified"))]
1395    pub const fn eq_ignore_ascii_case(&self, other: &char) -> bool {
1396        self.to_ascii_lowercase() == other.to_ascii_lowercase()
1397    }
1398
1399    /// Converts this type to its ASCII upper case equivalent in-place.
1400    ///
1401    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1402    /// but non-ASCII letters are unchanged.
1403    ///
1404    /// To return a new uppercased value without modifying the existing one, use
1405    /// [`to_ascii_uppercase()`].
1406    ///
1407    /// # Examples
1408    ///
1409    /// ```
1410    /// let mut ascii = 'a';
1411    ///
1412    /// ascii.make_ascii_uppercase();
1413    ///
1414    /// assert_eq!('A', ascii);
1415    /// ```
1416    ///
1417    /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
1418    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1419    #[rustc_const_stable(feature = "const_make_ascii", since = "1.84.0")]
1420    #[inline]
1421    #[cfg(not(feature = "ferrocene_certified"))]
1422    pub const fn make_ascii_uppercase(&mut self) {
1423        *self = self.to_ascii_uppercase();
1424    }
1425
1426    /// Converts this type to its ASCII lower case equivalent in-place.
1427    ///
1428    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1429    /// but non-ASCII letters are unchanged.
1430    ///
1431    /// To return a new lowercased value without modifying the existing one, use
1432    /// [`to_ascii_lowercase()`].
1433    ///
1434    /// # Examples
1435    ///
1436    /// ```
1437    /// let mut ascii = 'A';
1438    ///
1439    /// ascii.make_ascii_lowercase();
1440    ///
1441    /// assert_eq!('a', ascii);
1442    /// ```
1443    ///
1444    /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
1445    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1446    #[rustc_const_stable(feature = "const_make_ascii", since = "1.84.0")]
1447    #[inline]
1448    #[cfg(not(feature = "ferrocene_certified"))]
1449    pub const fn make_ascii_lowercase(&mut self) {
1450        *self = self.to_ascii_lowercase();
1451    }
1452
1453    /// Checks if the value is an ASCII alphabetic character:
1454    ///
1455    /// - U+0041 'A' ..= U+005A 'Z', or
1456    /// - U+0061 'a' ..= U+007A 'z'.
1457    ///
1458    /// # Examples
1459    ///
1460    /// ```
1461    /// let uppercase_a = 'A';
1462    /// let uppercase_g = 'G';
1463    /// let a = 'a';
1464    /// let g = 'g';
1465    /// let zero = '0';
1466    /// let percent = '%';
1467    /// let space = ' ';
1468    /// let lf = '\n';
1469    /// let esc = '\x1b';
1470    ///
1471    /// assert!(uppercase_a.is_ascii_alphabetic());
1472    /// assert!(uppercase_g.is_ascii_alphabetic());
1473    /// assert!(a.is_ascii_alphabetic());
1474    /// assert!(g.is_ascii_alphabetic());
1475    /// assert!(!zero.is_ascii_alphabetic());
1476    /// assert!(!percent.is_ascii_alphabetic());
1477    /// assert!(!space.is_ascii_alphabetic());
1478    /// assert!(!lf.is_ascii_alphabetic());
1479    /// assert!(!esc.is_ascii_alphabetic());
1480    /// ```
1481    #[must_use]
1482    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1483    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1484    #[inline]
1485    #[cfg(not(feature = "ferrocene_certified"))]
1486    pub const fn is_ascii_alphabetic(&self) -> bool {
1487        matches!(*self, 'A'..='Z' | 'a'..='z')
1488    }
1489
1490    /// Checks if the value is an ASCII uppercase character:
1491    /// U+0041 'A' ..= U+005A 'Z'.
1492    ///
1493    /// # Examples
1494    ///
1495    /// ```
1496    /// let uppercase_a = 'A';
1497    /// let uppercase_g = 'G';
1498    /// let a = 'a';
1499    /// let g = 'g';
1500    /// let zero = '0';
1501    /// let percent = '%';
1502    /// let space = ' ';
1503    /// let lf = '\n';
1504    /// let esc = '\x1b';
1505    ///
1506    /// assert!(uppercase_a.is_ascii_uppercase());
1507    /// assert!(uppercase_g.is_ascii_uppercase());
1508    /// assert!(!a.is_ascii_uppercase());
1509    /// assert!(!g.is_ascii_uppercase());
1510    /// assert!(!zero.is_ascii_uppercase());
1511    /// assert!(!percent.is_ascii_uppercase());
1512    /// assert!(!space.is_ascii_uppercase());
1513    /// assert!(!lf.is_ascii_uppercase());
1514    /// assert!(!esc.is_ascii_uppercase());
1515    /// ```
1516    #[must_use]
1517    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1518    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1519    #[inline]
1520    #[cfg(not(feature = "ferrocene_certified"))]
1521    pub const fn is_ascii_uppercase(&self) -> bool {
1522        matches!(*self, 'A'..='Z')
1523    }
1524
1525    /// Checks if the value is an ASCII lowercase character:
1526    /// U+0061 'a' ..= U+007A 'z'.
1527    ///
1528    /// # Examples
1529    ///
1530    /// ```
1531    /// let uppercase_a = 'A';
1532    /// let uppercase_g = 'G';
1533    /// let a = 'a';
1534    /// let g = 'g';
1535    /// let zero = '0';
1536    /// let percent = '%';
1537    /// let space = ' ';
1538    /// let lf = '\n';
1539    /// let esc = '\x1b';
1540    ///
1541    /// assert!(!uppercase_a.is_ascii_lowercase());
1542    /// assert!(!uppercase_g.is_ascii_lowercase());
1543    /// assert!(a.is_ascii_lowercase());
1544    /// assert!(g.is_ascii_lowercase());
1545    /// assert!(!zero.is_ascii_lowercase());
1546    /// assert!(!percent.is_ascii_lowercase());
1547    /// assert!(!space.is_ascii_lowercase());
1548    /// assert!(!lf.is_ascii_lowercase());
1549    /// assert!(!esc.is_ascii_lowercase());
1550    /// ```
1551    #[must_use]
1552    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1553    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1554    #[inline]
1555    #[cfg(not(feature = "ferrocene_certified"))]
1556    pub const fn is_ascii_lowercase(&self) -> bool {
1557        matches!(*self, 'a'..='z')
1558    }
1559
1560    /// Checks if the value is an ASCII alphanumeric character:
1561    ///
1562    /// - U+0041 'A' ..= U+005A 'Z', or
1563    /// - U+0061 'a' ..= U+007A 'z', or
1564    /// - U+0030 '0' ..= U+0039 '9'.
1565    ///
1566    /// # Examples
1567    ///
1568    /// ```
1569    /// let uppercase_a = 'A';
1570    /// let uppercase_g = 'G';
1571    /// let a = 'a';
1572    /// let g = 'g';
1573    /// let zero = '0';
1574    /// let percent = '%';
1575    /// let space = ' ';
1576    /// let lf = '\n';
1577    /// let esc = '\x1b';
1578    ///
1579    /// assert!(uppercase_a.is_ascii_alphanumeric());
1580    /// assert!(uppercase_g.is_ascii_alphanumeric());
1581    /// assert!(a.is_ascii_alphanumeric());
1582    /// assert!(g.is_ascii_alphanumeric());
1583    /// assert!(zero.is_ascii_alphanumeric());
1584    /// assert!(!percent.is_ascii_alphanumeric());
1585    /// assert!(!space.is_ascii_alphanumeric());
1586    /// assert!(!lf.is_ascii_alphanumeric());
1587    /// assert!(!esc.is_ascii_alphanumeric());
1588    /// ```
1589    #[must_use]
1590    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1591    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1592    #[inline]
1593    #[cfg(not(feature = "ferrocene_certified"))]
1594    pub const fn is_ascii_alphanumeric(&self) -> bool {
1595        matches!(*self, '0'..='9') | matches!(*self, 'A'..='Z') | matches!(*self, 'a'..='z')
1596    }
1597
1598    /// Checks if the value is an ASCII decimal digit:
1599    /// U+0030 '0' ..= U+0039 '9'.
1600    ///
1601    /// # Examples
1602    ///
1603    /// ```
1604    /// let uppercase_a = 'A';
1605    /// let uppercase_g = 'G';
1606    /// let a = 'a';
1607    /// let g = 'g';
1608    /// let zero = '0';
1609    /// let percent = '%';
1610    /// let space = ' ';
1611    /// let lf = '\n';
1612    /// let esc = '\x1b';
1613    ///
1614    /// assert!(!uppercase_a.is_ascii_digit());
1615    /// assert!(!uppercase_g.is_ascii_digit());
1616    /// assert!(!a.is_ascii_digit());
1617    /// assert!(!g.is_ascii_digit());
1618    /// assert!(zero.is_ascii_digit());
1619    /// assert!(!percent.is_ascii_digit());
1620    /// assert!(!space.is_ascii_digit());
1621    /// assert!(!lf.is_ascii_digit());
1622    /// assert!(!esc.is_ascii_digit());
1623    /// ```
1624    #[must_use]
1625    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1626    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1627    #[inline]
1628    #[cfg(not(feature = "ferrocene_certified"))]
1629    pub const fn is_ascii_digit(&self) -> bool {
1630        matches!(*self, '0'..='9')
1631    }
1632
1633    /// Checks if the value is an ASCII octal digit:
1634    /// U+0030 '0' ..= U+0037 '7'.
1635    ///
1636    /// # Examples
1637    ///
1638    /// ```
1639    /// #![feature(is_ascii_octdigit)]
1640    ///
1641    /// let uppercase_a = 'A';
1642    /// let a = 'a';
1643    /// let zero = '0';
1644    /// let seven = '7';
1645    /// let nine = '9';
1646    /// let percent = '%';
1647    /// let lf = '\n';
1648    ///
1649    /// assert!(!uppercase_a.is_ascii_octdigit());
1650    /// assert!(!a.is_ascii_octdigit());
1651    /// assert!(zero.is_ascii_octdigit());
1652    /// assert!(seven.is_ascii_octdigit());
1653    /// assert!(!nine.is_ascii_octdigit());
1654    /// assert!(!percent.is_ascii_octdigit());
1655    /// assert!(!lf.is_ascii_octdigit());
1656    /// ```
1657    #[must_use]
1658    #[unstable(feature = "is_ascii_octdigit", issue = "101288")]
1659    #[inline]
1660    #[cfg(not(feature = "ferrocene_certified"))]
1661    pub const fn is_ascii_octdigit(&self) -> bool {
1662        matches!(*self, '0'..='7')
1663    }
1664
1665    /// Checks if the value is an ASCII hexadecimal digit:
1666    ///
1667    /// - U+0030 '0' ..= U+0039 '9', or
1668    /// - U+0041 'A' ..= U+0046 'F', or
1669    /// - U+0061 'a' ..= U+0066 'f'.
1670    ///
1671    /// # Examples
1672    ///
1673    /// ```
1674    /// let uppercase_a = 'A';
1675    /// let uppercase_g = 'G';
1676    /// let a = 'a';
1677    /// let g = 'g';
1678    /// let zero = '0';
1679    /// let percent = '%';
1680    /// let space = ' ';
1681    /// let lf = '\n';
1682    /// let esc = '\x1b';
1683    ///
1684    /// assert!(uppercase_a.is_ascii_hexdigit());
1685    /// assert!(!uppercase_g.is_ascii_hexdigit());
1686    /// assert!(a.is_ascii_hexdigit());
1687    /// assert!(!g.is_ascii_hexdigit());
1688    /// assert!(zero.is_ascii_hexdigit());
1689    /// assert!(!percent.is_ascii_hexdigit());
1690    /// assert!(!space.is_ascii_hexdigit());
1691    /// assert!(!lf.is_ascii_hexdigit());
1692    /// assert!(!esc.is_ascii_hexdigit());
1693    /// ```
1694    #[must_use]
1695    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1696    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1697    #[inline]
1698    #[cfg(not(feature = "ferrocene_certified"))]
1699    pub const fn is_ascii_hexdigit(&self) -> bool {
1700        matches!(*self, '0'..='9') | matches!(*self, 'A'..='F') | matches!(*self, 'a'..='f')
1701    }
1702
1703    /// Checks if the value is an ASCII punctuation character:
1704    ///
1705    /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /`, or
1706    /// - U+003A ..= U+0040 `: ; < = > ? @`, or
1707    /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``, or
1708    /// - U+007B ..= U+007E `{ | } ~`
1709    ///
1710    /// # Examples
1711    ///
1712    /// ```
1713    /// let uppercase_a = 'A';
1714    /// let uppercase_g = 'G';
1715    /// let a = 'a';
1716    /// let g = 'g';
1717    /// let zero = '0';
1718    /// let percent = '%';
1719    /// let space = ' ';
1720    /// let lf = '\n';
1721    /// let esc = '\x1b';
1722    ///
1723    /// assert!(!uppercase_a.is_ascii_punctuation());
1724    /// assert!(!uppercase_g.is_ascii_punctuation());
1725    /// assert!(!a.is_ascii_punctuation());
1726    /// assert!(!g.is_ascii_punctuation());
1727    /// assert!(!zero.is_ascii_punctuation());
1728    /// assert!(percent.is_ascii_punctuation());
1729    /// assert!(!space.is_ascii_punctuation());
1730    /// assert!(!lf.is_ascii_punctuation());
1731    /// assert!(!esc.is_ascii_punctuation());
1732    /// ```
1733    #[must_use]
1734    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1735    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1736    #[inline]
1737    #[cfg(not(feature = "ferrocene_certified"))]
1738    pub const fn is_ascii_punctuation(&self) -> bool {
1739        matches!(*self, '!'..='/')
1740            | matches!(*self, ':'..='@')
1741            | matches!(*self, '['..='`')
1742            | matches!(*self, '{'..='~')
1743    }
1744
1745    /// Checks if the value is an ASCII graphic character:
1746    /// U+0021 '!' ..= U+007E '~'.
1747    ///
1748    /// # Examples
1749    ///
1750    /// ```
1751    /// let uppercase_a = 'A';
1752    /// let uppercase_g = 'G';
1753    /// let a = 'a';
1754    /// let g = 'g';
1755    /// let zero = '0';
1756    /// let percent = '%';
1757    /// let space = ' ';
1758    /// let lf = '\n';
1759    /// let esc = '\x1b';
1760    ///
1761    /// assert!(uppercase_a.is_ascii_graphic());
1762    /// assert!(uppercase_g.is_ascii_graphic());
1763    /// assert!(a.is_ascii_graphic());
1764    /// assert!(g.is_ascii_graphic());
1765    /// assert!(zero.is_ascii_graphic());
1766    /// assert!(percent.is_ascii_graphic());
1767    /// assert!(!space.is_ascii_graphic());
1768    /// assert!(!lf.is_ascii_graphic());
1769    /// assert!(!esc.is_ascii_graphic());
1770    /// ```
1771    #[must_use]
1772    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1773    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1774    #[inline]
1775    #[cfg(not(feature = "ferrocene_certified"))]
1776    pub const fn is_ascii_graphic(&self) -> bool {
1777        matches!(*self, '!'..='~')
1778    }
1779
1780    /// Checks if the value is an ASCII whitespace character:
1781    /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
1782    /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
1783    ///
1784    /// Rust uses the WhatWG Infra Standard's [definition of ASCII
1785    /// whitespace][infra-aw]. There are several other definitions in
1786    /// wide use. For instance, [the POSIX locale][pct] includes
1787    /// U+000B VERTICAL TAB as well as all the above characters,
1788    /// but—from the very same specification—[the default rule for
1789    /// "field splitting" in the Bourne shell][bfs] considers *only*
1790    /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
1791    ///
1792    /// If you are writing a program that will process an existing
1793    /// file format, check what that format's definition of whitespace is
1794    /// before using this function.
1795    ///
1796    /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
1797    /// [pct]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
1798    /// [bfs]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
1799    ///
1800    /// # Examples
1801    ///
1802    /// ```
1803    /// let uppercase_a = 'A';
1804    /// let uppercase_g = 'G';
1805    /// let a = 'a';
1806    /// let g = 'g';
1807    /// let zero = '0';
1808    /// let percent = '%';
1809    /// let space = ' ';
1810    /// let lf = '\n';
1811    /// let esc = '\x1b';
1812    ///
1813    /// assert!(!uppercase_a.is_ascii_whitespace());
1814    /// assert!(!uppercase_g.is_ascii_whitespace());
1815    /// assert!(!a.is_ascii_whitespace());
1816    /// assert!(!g.is_ascii_whitespace());
1817    /// assert!(!zero.is_ascii_whitespace());
1818    /// assert!(!percent.is_ascii_whitespace());
1819    /// assert!(space.is_ascii_whitespace());
1820    /// assert!(lf.is_ascii_whitespace());
1821    /// assert!(!esc.is_ascii_whitespace());
1822    /// ```
1823    #[must_use]
1824    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1825    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1826    #[inline]
1827    #[cfg(not(feature = "ferrocene_certified"))]
1828    pub const fn is_ascii_whitespace(&self) -> bool {
1829        matches!(*self, '\t' | '\n' | '\x0C' | '\r' | ' ')
1830    }
1831
1832    /// Checks if the value is an ASCII control character:
1833    /// U+0000 NUL ..= U+001F UNIT SEPARATOR, or U+007F DELETE.
1834    /// Note that most ASCII whitespace characters are control
1835    /// characters, but SPACE is not.
1836    ///
1837    /// # Examples
1838    ///
1839    /// ```
1840    /// let uppercase_a = 'A';
1841    /// let uppercase_g = 'G';
1842    /// let a = 'a';
1843    /// let g = 'g';
1844    /// let zero = '0';
1845    /// let percent = '%';
1846    /// let space = ' ';
1847    /// let lf = '\n';
1848    /// let esc = '\x1b';
1849    ///
1850    /// assert!(!uppercase_a.is_ascii_control());
1851    /// assert!(!uppercase_g.is_ascii_control());
1852    /// assert!(!a.is_ascii_control());
1853    /// assert!(!g.is_ascii_control());
1854    /// assert!(!zero.is_ascii_control());
1855    /// assert!(!percent.is_ascii_control());
1856    /// assert!(!space.is_ascii_control());
1857    /// assert!(lf.is_ascii_control());
1858    /// assert!(esc.is_ascii_control());
1859    /// ```
1860    #[must_use]
1861    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1862    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1863    #[inline]
1864    #[cfg(not(feature = "ferrocene_certified"))]
1865    pub const fn is_ascii_control(&self) -> bool {
1866        matches!(*self, '\0'..='\x1F' | '\x7F')
1867    }
1868}
1869
1870#[cfg(not(feature = "ferrocene_certified"))]
1871pub(crate) struct EscapeDebugExtArgs {
1872    /// Escape Extended Grapheme codepoints?
1873    pub(crate) escape_grapheme_extended: bool,
1874
1875    /// Escape single quotes?
1876    pub(crate) escape_single_quote: bool,
1877
1878    /// Escape double quotes?
1879    pub(crate) escape_double_quote: bool,
1880}
1881
1882#[cfg(not(feature = "ferrocene_certified"))]
1883impl EscapeDebugExtArgs {
1884    pub(crate) const ESCAPE_ALL: Self = Self {
1885        escape_grapheme_extended: true,
1886        escape_single_quote: true,
1887        escape_double_quote: true,
1888    };
1889}
1890
1891#[inline]
1892#[must_use]
1893const fn len_utf8(code: u32) -> usize {
1894    match code {
1895        ..MAX_ONE_B => 1,
1896        ..MAX_TWO_B => 2,
1897        ..MAX_THREE_B => 3,
1898        _ => 4,
1899    }
1900}
1901
1902#[inline]
1903#[must_use]
1904#[cfg(not(feature = "ferrocene_certified"))]
1905const fn len_utf16(code: u32) -> usize {
1906    if (code & 0xFFFF) == code { 1 } else { 2 }
1907}
1908
1909/// Encodes a raw `u32` value as UTF-8 into the provided byte buffer,
1910/// and then returns the subslice of the buffer that contains the encoded character.
1911///
1912/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
1913/// (Creating a `char` in the surrogate range is UB.)
1914/// The result is valid [generalized UTF-8] but not valid UTF-8.
1915///
1916/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
1917///
1918/// # Panics
1919///
1920/// Panics if the buffer is not large enough.
1921/// A buffer of length four is large enough to encode any `char`.
1922#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1923#[doc(hidden)]
1924#[inline]
1925#[cfg(not(feature = "ferrocene_certified"))]
1926pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
1927    let len = len_utf8(code);
1928    if dst.len() < len {
1929        const_panic!(
1930            "encode_utf8: buffer does not have enough bytes to encode code point",
1931            "encode_utf8: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
1932            code: u32 = code,
1933            len: usize = len,
1934            dst_len: usize = dst.len(),
1935        );
1936    }
1937
1938    // SAFETY: `dst` is checked to be at least the length needed to encode the codepoint.
1939    unsafe { encode_utf8_raw_unchecked(code, dst.as_mut_ptr()) };
1940
1941    // SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
1942    unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
1943}
1944
1945/// Encodes a raw `u32` value as UTF-8 into the byte buffer pointed to by `dst`.
1946///
1947/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
1948/// (Creating a `char` in the surrogate range is UB.)
1949/// The result is valid [generalized UTF-8] but not valid UTF-8.
1950///
1951/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
1952///
1953/// # Safety
1954///
1955/// The behavior is undefined if the buffer pointed to by `dst` is not
1956/// large enough to hold the encoded codepoint. A buffer of length four
1957/// is large enough to encode any `char`.
1958///
1959/// For a safe version of this function, see the [`encode_utf8_raw`] function.
1960#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1961#[doc(hidden)]
1962#[inline]
1963#[cfg(not(feature = "ferrocene_certified"))]
1964pub const unsafe fn encode_utf8_raw_unchecked(code: u32, dst: *mut u8) {
1965    let len = len_utf8(code);
1966    // SAFETY: The caller must guarantee that the buffer pointed to by `dst`
1967    // is at least `len` bytes long.
1968    unsafe {
1969        if len == 1 {
1970            *dst = code as u8;
1971            return;
1972        }
1973
1974        let last1 = (code >> 0 & 0x3F) as u8 | TAG_CONT;
1975        let last2 = (code >> 6 & 0x3F) as u8 | TAG_CONT;
1976        let last3 = (code >> 12 & 0x3F) as u8 | TAG_CONT;
1977        let last4 = (code >> 18 & 0x3F) as u8 | TAG_FOUR_B;
1978
1979        if len == 2 {
1980            *dst = last2 | TAG_TWO_B;
1981            *dst.add(1) = last1;
1982            return;
1983        }
1984
1985        if len == 3 {
1986            *dst = last3 | TAG_THREE_B;
1987            *dst.add(1) = last2;
1988            *dst.add(2) = last1;
1989            return;
1990        }
1991
1992        *dst = last4;
1993        *dst.add(1) = last3;
1994        *dst.add(2) = last2;
1995        *dst.add(3) = last1;
1996    }
1997}
1998
1999/// Encodes a raw `u32` value as native endian UTF-16 into the provided `u16` buffer,
2000/// and then returns the subslice of the buffer that contains the encoded character.
2001///
2002/// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range.
2003/// (Creating a `char` in the surrogate range is UB.)
2004///
2005/// # Panics
2006///
2007/// Panics if the buffer is not large enough.
2008/// A buffer of length 2 is large enough to encode any `char`.
2009#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
2010#[doc(hidden)]
2011#[inline]
2012#[cfg(not(feature = "ferrocene_certified"))]
2013pub const fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
2014    let len = len_utf16(code);
2015    match (len, &mut *dst) {
2016        (1, [a, ..]) => {
2017            *a = code as u16;
2018        }
2019        (2, [a, b, ..]) => {
2020            code -= 0x1_0000;
2021            *a = (code >> 10) as u16 | 0xD800;
2022            *b = (code & 0x3FF) as u16 | 0xDC00;
2023        }
2024        _ => {
2025            const_panic!(
2026                "encode_utf16: buffer does not have enough bytes to encode code point",
2027                "encode_utf16: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
2028                code: u32 = code,
2029                len: usize = len,
2030                dst_len: usize = dst.len(),
2031            )
2032        }
2033    };
2034    // SAFETY: `<&mut [u16]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
2035    unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
2036}