Skip to main content

core/char/
methods.rs

1//! impl char {}
2
3use super::*;
4use crate::panic::const_panic;
5use crate::slice;
6use crate::str::from_utf8_unchecked_mut;
7use crate::ub_checks::assert_unsafe_precondition;
8use crate::unicode::{self, conversions};
9
10impl char {
11    /// The lowest valid code point a `char` can have, `'\0'`.
12    ///
13    /// Unlike integer types, `char` actually has a gap in the middle,
14    /// meaning that the range of possible `char`s is smaller than you
15    /// might expect. Ranges of `char` will automatically hop this gap
16    /// for you:
17    ///
18    /// ```
19    /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
20    /// let size = (char::MIN..=char::MAX).count() as u32;
21    /// assert!(size < dist);
22    /// ```
23    ///
24    /// Despite this gap, the `MIN` and [`MAX`] values can be used as bounds for
25    /// all `char` values.
26    ///
27    /// [`MAX`]: char::MAX
28    ///
29    /// # Examples
30    ///
31    /// ```
32    /// # fn something_which_returns_char() -> char { 'a' }
33    /// let c: char = something_which_returns_char();
34    /// assert!(char::MIN <= c);
35    ///
36    /// let value_at_min = u32::from(char::MIN);
37    /// assert_eq!(char::from_u32(value_at_min), Some('\0'));
38    /// ```
39    #[stable(feature = "char_min", since = "1.83.0")]
40    pub const MIN: char = '\0';
41
42    /// The highest valid code point a `char` can have, `'\u{10FFFF}'`.
43    ///
44    /// Unlike integer types, `char` actually has a gap in the middle,
45    /// meaning that the range of possible `char`s is smaller than you
46    /// might expect. Ranges of `char` will automatically hop this gap
47    /// for you:
48    ///
49    /// ```
50    /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
51    /// let size = (char::MIN..=char::MAX).count() as u32;
52    /// assert!(size < dist);
53    /// ```
54    ///
55    /// Despite this gap, the [`MIN`] and `MAX` values can be used as bounds for
56    /// all `char` values.
57    ///
58    /// [`MIN`]: char::MIN
59    ///
60    /// # Examples
61    ///
62    /// ```
63    /// # fn something_which_returns_char() -> char { 'a' }
64    /// let c: char = something_which_returns_char();
65    /// assert!(c <= char::MAX);
66    ///
67    /// let value_at_max = u32::from(char::MAX);
68    /// assert_eq!(char::from_u32(value_at_max), Some('\u{10FFFF}'));
69    /// assert_eq!(char::from_u32(value_at_max + 1), None);
70    /// ```
71    #[stable(feature = "assoc_char_consts", since = "1.52.0")]
72    pub const MAX: char = '\u{10FFFF}';
73
74    /// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
75    /// UTF-8 encoding.
76    #[stable(feature = "char_max_len_assoc", since = "1.93.0")]
77    pub const MAX_LEN_UTF8: usize = 4;
78
79    /// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
80    /// to UTF-16 encoding.
81    #[stable(feature = "char_max_len_assoc", since = "1.93.0")]
82    pub const MAX_LEN_UTF16: usize = 2;
83
84    /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
85    /// decoding error.
86    ///
87    /// It can occur, for example, when giving ill-formed UTF-8 bytes to
88    /// [`String::from_utf8_lossy`](../std/string/struct.String.html#method.from_utf8_lossy).
89    #[stable(feature = "assoc_char_consts", since = "1.52.0")]
90    pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
91
92    /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
93    /// `char` and `str` methods are based on.
94    ///
95    /// New versions of Unicode are released regularly, and subsequently all methods
96    /// in the standard library depending on Unicode are updated. Therefore, the
97    /// behavior of some `char` and `str` methods, and the value of this constant,
98    /// change over time (within the boundaries of Unicode's [stability policies]).
99    /// This is *not* considered to be a breaking change.
100    ///
101    /// [stability policies]: https://www.unicode.org/policies/stability_policy.html
102    ///
103    /// The version numbering scheme is explained in
104    /// [Section 3.1 (Version Numbering)] of the Unicode Standard.
105    ///
106    /// [Section 3.1 (Version Numbering)]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G49512
107    #[stable(feature = "assoc_char_consts", since = "1.52.0")]
108    pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION;
109
110    /// Creates an iterator over the native endian UTF-16 encoded code points in `iter`,
111    /// returning unpaired surrogates as `Err`s.
112    ///
113    /// # Examples
114    ///
115    /// Basic usage:
116    ///
117    /// ```
118    /// // 𝄞mus<invalid>ic<invalid>
119    /// let v = [
120    ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
121    /// ];
122    ///
123    /// assert_eq!(
124    ///     char::decode_utf16(v)
125    ///         .map(|r| r.map_err(|e| e.unpaired_surrogate()))
126    ///         .collect::<Vec<_>>(),
127    ///     vec![
128    ///         Ok('𝄞'),
129    ///         Ok('m'), Ok('u'), Ok('s'),
130    ///         Err(0xDD1E),
131    ///         Ok('i'), Ok('c'),
132    ///         Err(0xD834)
133    ///     ]
134    /// );
135    /// ```
136    ///
137    /// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
138    ///
139    /// ```
140    /// // 𝄞mus<invalid>ic<invalid>
141    /// let v = [
142    ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
143    /// ];
144    ///
145    /// assert_eq!(
146    ///     char::decode_utf16(v)
147    ///        .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER))
148    ///        .collect::<String>(),
149    ///     "𝄞mus�ic�"
150    /// );
151    /// ```
152    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
153    #[inline]
154    #[ferrocene::prevalidated]
155    pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
156        super::decode::decode_utf16(iter)
157    }
158
159    /// Converts a `u32` to a `char`.
160    ///
161    /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
162    /// [`as`](../std/keyword.as.html):
163    ///
164    /// ```
165    /// let c = '💯';
166    /// let i = c as u32;
167    ///
168    /// assert_eq!(128175, i);
169    /// ```
170    ///
171    /// However, the reverse is not true: not all valid [`u32`]s are valid
172    /// `char`s. `from_u32()` will return `None` if the input is not a valid value
173    /// for a `char`.
174    ///
175    /// For an unsafe version of this function which ignores these checks, see
176    /// [`from_u32_unchecked`].
177    ///
178    /// [`from_u32_unchecked`]: #method.from_u32_unchecked
179    ///
180    /// # Examples
181    ///
182    /// Basic usage:
183    ///
184    /// ```
185    /// let c = char::from_u32(0x2764);
186    ///
187    /// assert_eq!(Some('❤'), c);
188    /// ```
189    ///
190    /// Returning `None` when the input is not a valid `char`:
191    ///
192    /// ```
193    /// let c = char::from_u32(0x110000);
194    ///
195    /// assert_eq!(None, c);
196    /// ```
197    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
198    #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
199    #[must_use]
200    #[inline]
201    pub const fn from_u32(i: u32) -> Option<char> {
202        super::convert::from_u32(i)
203    }
204
205    /// Converts a `u32` to a `char`, ignoring validity.
206    ///
207    /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
208    /// `as`:
209    ///
210    /// ```
211    /// let c = '💯';
212    /// let i = c as u32;
213    ///
214    /// assert_eq!(128175, i);
215    /// ```
216    ///
217    /// However, the reverse is not true: not all valid [`u32`]s are valid
218    /// `char`s. `from_u32_unchecked()` will ignore this, and blindly cast to
219    /// `char`, possibly creating an invalid one.
220    ///
221    /// # Safety
222    ///
223    /// This function is unsafe, as it may construct invalid `char` values.
224    ///
225    /// For a safe version of this function, see the [`from_u32`] function.
226    ///
227    /// [`from_u32`]: #method.from_u32
228    ///
229    /// # Examples
230    ///
231    /// Basic usage:
232    ///
233    /// ```
234    /// let c = unsafe { char::from_u32_unchecked(0x2764) };
235    ///
236    /// assert_eq!('❤', c);
237    /// ```
238    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
239    #[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")]
240    #[must_use]
241    #[inline]
242    #[ferrocene::prevalidated]
243    pub const unsafe fn from_u32_unchecked(i: u32) -> char {
244        // SAFETY: the safety contract must be upheld by the caller.
245        unsafe { super::convert::from_u32_unchecked(i) }
246    }
247
248    /// Converts a digit in the given radix to a `char`.
249    ///
250    /// A 'radix' here is sometimes also called a 'base'. A radix of two
251    /// indicates a binary number, a radix of ten, decimal, and a radix of
252    /// sixteen, hexadecimal, to give some common values. Arbitrary
253    /// radices are supported.
254    ///
255    /// `from_digit()` will return `None` if the input is not a digit in
256    /// the given radix.
257    ///
258    /// # Panics
259    ///
260    /// Panics if given a radix larger than 36.
261    ///
262    /// # Examples
263    ///
264    /// Basic usage:
265    ///
266    /// ```
267    /// let c = char::from_digit(4, 10);
268    ///
269    /// assert_eq!(Some('4'), c);
270    ///
271    /// // Decimal 11 is a single digit in base 16
272    /// let c = char::from_digit(11, 16);
273    ///
274    /// assert_eq!(Some('b'), c);
275    /// ```
276    ///
277    /// Returning `None` when the input is not a digit:
278    ///
279    /// ```
280    /// let c = char::from_digit(20, 10);
281    ///
282    /// assert_eq!(None, c);
283    /// ```
284    ///
285    /// Passing a large radix, causing a panic:
286    ///
287    /// ```should_panic
288    /// // this panics
289    /// let _c = char::from_digit(1, 37);
290    /// ```
291    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
292    #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
293    #[must_use]
294    #[inline]
295    pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
296        super::convert::from_digit(num, radix)
297    }
298
299    /// Checks if a `char` is a digit in the given radix.
300    ///
301    /// A 'radix' here is sometimes also called a 'base'. A radix of two
302    /// indicates a binary number, a radix of ten, decimal, and a radix of
303    /// sixteen, hexadecimal, to give some common values. Arbitrary
304    /// radices are supported.
305    ///
306    /// Compared to [`is_numeric()`], this function only recognizes the characters
307    /// `0-9`, `a-z` and `A-Z`.
308    ///
309    /// 'Digit' is defined to be only the following characters:
310    ///
311    /// * `0-9`
312    /// * `a-z`
313    /// * `A-Z`
314    ///
315    /// For a more comprehensive understanding of 'digit', see [`is_numeric()`].
316    ///
317    /// [`is_numeric()`]: #method.is_numeric
318    ///
319    /// # Panics
320    ///
321    /// Panics if given a radix smaller than 2 or larger than 36.
322    ///
323    /// # Examples
324    ///
325    /// Basic usage:
326    ///
327    /// ```
328    /// assert!('1'.is_digit(10));
329    /// assert!('f'.is_digit(16));
330    /// assert!(!'f'.is_digit(10));
331    /// ```
332    ///
333    /// Passing a large radix, causing a panic:
334    ///
335    /// ```should_panic
336    /// // this panics
337    /// '1'.is_digit(37);
338    /// ```
339    ///
340    /// Passing a small radix, causing a panic:
341    ///
342    /// ```should_panic
343    /// // this panics
344    /// '1'.is_digit(1);
345    /// ```
346    #[stable(feature = "rust1", since = "1.0.0")]
347    #[rustc_const_stable(feature = "const_char_classify", since = "1.87.0")]
348    #[inline]
349    pub const fn is_digit(self, radix: u32) -> bool {
350        self.to_digit(radix).is_some()
351    }
352
353    /// Converts a `char` to a digit in the given radix.
354    ///
355    /// A 'radix' here is sometimes also called a 'base'. A radix of two
356    /// indicates a binary number, a radix of ten, decimal, and a radix of
357    /// sixteen, hexadecimal, to give some common values. Arbitrary
358    /// radices are supported.
359    ///
360    /// 'Digit' is defined to be only the following characters:
361    ///
362    /// * `0-9`
363    /// * `a-z`
364    /// * `A-Z`
365    ///
366    /// # Errors
367    ///
368    /// Returns `None` if the `char` does not refer to a digit in the given radix.
369    ///
370    /// # Panics
371    ///
372    /// Panics if given a radix smaller than 2 or larger than 36.
373    ///
374    /// # Examples
375    ///
376    /// Basic usage:
377    ///
378    /// ```
379    /// assert_eq!('1'.to_digit(10), Some(1));
380    /// assert_eq!('f'.to_digit(16), Some(15));
381    /// ```
382    ///
383    /// Passing a non-digit results in failure:
384    ///
385    /// ```
386    /// assert_eq!('f'.to_digit(10), None);
387    /// assert_eq!('z'.to_digit(16), None);
388    /// ```
389    ///
390    /// Passing a large radix, causing a panic:
391    ///
392    /// ```should_panic
393    /// // this panics
394    /// let _ = '1'.to_digit(37);
395    /// ```
396    /// Passing a small radix, causing a panic:
397    ///
398    /// ```should_panic
399    /// // this panics
400    /// let _ = '1'.to_digit(1);
401    /// ```
402    #[stable(feature = "rust1", since = "1.0.0")]
403    #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
404    #[rustc_diagnostic_item = "char_to_digit"]
405    #[must_use = "this returns the result of the operation, \
406                  without modifying the original"]
407    #[inline]
408    #[ferrocene::prevalidated]
409    pub const fn to_digit(self, radix: u32) -> Option<u32> {
410        assert!(
411            radix >= 2 && radix <= 36,
412            "to_digit: invalid radix -- radix must be in the range 2 to 36 inclusive"
413        );
414        // check radix to remove letter handling code when radix is a known constant
415        let value = if self > '9' && radix > 10 {
416            // mask to convert ASCII letters to uppercase
417            const TO_UPPERCASE_MASK: u32 = !0b0010_0000;
418            // Converts an ASCII letter to its corresponding integer value:
419            // A-Z => 10-35, a-z => 10-35. Other characters produce values >= 36.
420            //
421            // Add Overflow Safety:
422            // By applying the mask after the subtraction, the first addendum is
423            // constrained such that it never exceeds u32::MAX - 0x20.
424            ((self as u32).wrapping_sub('A' as u32) & TO_UPPERCASE_MASK) + 10
425        } else {
426            // convert digit to value, non-digits wrap to values > 36
427            (self as u32).wrapping_sub('0' as u32)
428        };
429        // FIXME(const-hack): once then_some is const fn, use it here
430        if value < radix { Some(value) } else { None }
431    }
432
433    /// Returns an iterator that yields the hexadecimal Unicode escape of a
434    /// character as `char`s.
435    ///
436    /// This will escape characters with the Rust syntax of the form
437    /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation.
438    ///
439    /// # Examples
440    ///
441    /// As an iterator:
442    ///
443    /// ```
444    /// for c in '❤'.escape_unicode() {
445    ///     print!("{c}");
446    /// }
447    /// println!();
448    /// ```
449    ///
450    /// Using `println!` directly:
451    ///
452    /// ```
453    /// println!("{}", '❤'.escape_unicode());
454    /// ```
455    ///
456    /// Both are equivalent to:
457    ///
458    /// ```
459    /// println!("\\u{{2764}}");
460    /// ```
461    ///
462    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
463    ///
464    /// ```
465    /// assert_eq!('❤'.escape_unicode().to_string(), "\\u{2764}");
466    /// ```
467    #[must_use = "this returns the escaped char as an iterator, \
468                  without modifying the original"]
469    #[stable(feature = "rust1", since = "1.0.0")]
470    #[inline]
471    #[ferrocene::prevalidated]
472    pub fn escape_unicode(self) -> EscapeUnicode {
473        EscapeUnicode::new(self)
474    }
475
476    /// An extended version of `escape_debug` that optionally permits escaping
477    /// Extended Grapheme codepoints, single quotes, and double quotes. This
478    /// allows us to format characters like nonspacing marks better when they're
479    /// at the start of a string, and allows escaping single quotes in
480    /// characters, and double quotes in strings.
481    #[inline]
482    #[ferrocene::prevalidated]
483    pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
484        match self {
485            // Special escapes
486            '\"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark),
487            '\'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe),
488            '\\' => EscapeDebug::backslash(ascii::Char::ReverseSolidus),
489            '\n' => EscapeDebug::backslash(ascii::Char::SmallN),
490            '\t' => EscapeDebug::backslash(ascii::Char::SmallT),
491            '\r' => EscapeDebug::backslash(ascii::Char::SmallR),
492            '\0' => EscapeDebug::backslash(ascii::Char::Digit0),
493
494            // ASCII fast path
495            '\x20'..='\x7E' => EscapeDebug::printable(self),
496
497            _ if self.is_control()
498                || self.is_private_use()
499                || self.is_whitespace()
500                || args.escape_grapheme_extender && self.is_grapheme_extender()
501                || self.is_default_ignorable()
502                || self.is_format_control()
503                || self.is_unassigned() =>
504            {
505                EscapeDebug::unicode(self)
506            }
507
508            _ => EscapeDebug::printable(self),
509        }
510    }
511
512    /// Returns an iterator that yields the literal escape code of a character
513    /// as `char`s.
514    ///
515    /// This will escape the characters similar to the [`Debug`](core::fmt::Debug) implementations
516    /// of `str` or `char`.
517    ///
518    /// # Examples
519    ///
520    /// As an iterator:
521    ///
522    /// ```
523    /// for c in '\n'.escape_debug() {
524    ///     print!("{c}");
525    /// }
526    /// println!();
527    /// ```
528    ///
529    /// Using `println!` directly:
530    ///
531    /// ```
532    /// println!("{}", '\n'.escape_debug());
533    /// ```
534    ///
535    /// Both are equivalent to:
536    ///
537    /// ```
538    /// println!("\\n");
539    /// ```
540    ///
541    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
542    ///
543    /// ```
544    /// assert_eq!('\n'.escape_debug().to_string(), "\\n");
545    /// ```
546    #[must_use = "this returns the escaped char as an iterator, \
547                  without modifying the original"]
548    #[stable(feature = "char_escape_debug", since = "1.20.0")]
549    #[inline]
550    #[ferrocene::prevalidated]
551    pub fn escape_debug(self) -> EscapeDebug {
552        self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL)
553    }
554
555    /// Returns an iterator that yields the literal escape code of a character
556    /// as `char`s.
557    ///
558    /// The default is chosen with a bias toward producing literals that are
559    /// legal in a variety of languages, including C++11 and similar C-family
560    /// languages. The exact rules are:
561    ///
562    /// * Tab is escaped as `\t`.
563    /// * Carriage return is escaped as `\r`.
564    /// * Line feed is escaped as `\n`.
565    /// * Single quote is escaped as `\'`.
566    /// * Double quote is escaped as `\"`.
567    /// * Backslash is escaped as `\\`.
568    /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e`
569    ///   inclusive is not escaped.
570    /// * All other characters are given hexadecimal Unicode escapes; see
571    ///   [`escape_unicode`].
572    ///
573    /// [`escape_unicode`]: #method.escape_unicode
574    ///
575    /// # Examples
576    ///
577    /// As an iterator:
578    ///
579    /// ```
580    /// for c in '"'.escape_default() {
581    ///     print!("{c}");
582    /// }
583    /// println!();
584    /// ```
585    ///
586    /// Using `println!` directly:
587    ///
588    /// ```
589    /// println!("{}", '"'.escape_default());
590    /// ```
591    ///
592    /// Both are equivalent to:
593    ///
594    /// ```
595    /// println!("\\\"");
596    /// ```
597    ///
598    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
599    ///
600    /// ```
601    /// assert_eq!('"'.escape_default().to_string(), "\\\"");
602    /// ```
603    #[must_use = "this returns the escaped char as an iterator, \
604                  without modifying the original"]
605    #[stable(feature = "rust1", since = "1.0.0")]
606    #[inline]
607    #[ferrocene::prevalidated]
608    pub fn escape_default(self) -> EscapeDefault {
609        match self {
610            '\t' => EscapeDefault::backslash(ascii::Char::SmallT),
611            '\r' => EscapeDefault::backslash(ascii::Char::SmallR),
612            '\n' => EscapeDefault::backslash(ascii::Char::SmallN),
613            '\\' | '\'' | '\"' => EscapeDefault::backslash(self.as_ascii().unwrap()),
614            '\x20'..='\x7e' => EscapeDefault::printable(self.as_ascii().unwrap()),
615            _ => EscapeDefault::unicode(self),
616        }
617    }
618
619    /// Returns the number of bytes this `char` would need if encoded in UTF-8.
620    ///
621    /// That number of bytes is always between 1 and 4, inclusive.
622    ///
623    /// # Examples
624    ///
625    /// Basic usage:
626    ///
627    /// ```
628    /// let len = 'A'.len_utf8();
629    /// assert_eq!(len, 1);
630    ///
631    /// let len = 'ß'.len_utf8();
632    /// assert_eq!(len, 2);
633    ///
634    /// let len = 'ℝ'.len_utf8();
635    /// assert_eq!(len, 3);
636    ///
637    /// let len = '💣'.len_utf8();
638    /// assert_eq!(len, 4);
639    /// ```
640    ///
641    /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it
642    /// would take if each code point was represented as a `char` vs in the `&str` itself:
643    ///
644    /// ```
645    /// // as chars
646    /// let eastern = '東';
647    /// let capital = '京';
648    ///
649    /// // both can be represented as three bytes
650    /// assert_eq!(3, eastern.len_utf8());
651    /// assert_eq!(3, capital.len_utf8());
652    ///
653    /// // as a &str, these two are encoded in UTF-8
654    /// let tokyo = "東京";
655    ///
656    /// let len = eastern.len_utf8() + capital.len_utf8();
657    ///
658    /// // we can see that they take six bytes total...
659    /// assert_eq!(6, tokyo.len());
660    ///
661    /// // ... just like the &str
662    /// assert_eq!(len, tokyo.len());
663    /// ```
664    #[stable(feature = "rust1", since = "1.0.0")]
665    #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
666    #[inline]
667    #[must_use]
668    #[ferrocene::prevalidated]
669    pub const fn len_utf8(self) -> usize {
670        len_utf8(self as u32)
671    }
672
673    /// Returns the number of 16-bit code units this `char` would need if
674    /// encoded in UTF-16.
675    ///
676    /// That number of code units is always either 1 or 2, for unicode scalar values in
677    /// the [basic multilingual plane] or [supplementary planes] respectively.
678    ///
679    /// See the documentation for [`len_utf8()`] for more explanation of this
680    /// concept. This function is a mirror, but for UTF-16 instead of UTF-8.
681    ///
682    /// [basic multilingual plane]: http://www.unicode.org/glossary/#basic_multilingual_plane
683    /// [supplementary planes]: http://www.unicode.org/glossary/#supplementary_planes
684    /// [`len_utf8()`]: #method.len_utf8
685    ///
686    /// # Examples
687    ///
688    /// Basic usage:
689    ///
690    /// ```
691    /// let n = 'ß'.len_utf16();
692    /// assert_eq!(n, 1);
693    ///
694    /// let len = '💣'.len_utf16();
695    /// assert_eq!(len, 2);
696    /// ```
697    #[stable(feature = "rust1", since = "1.0.0")]
698    #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
699    #[inline]
700    #[must_use]
701    pub const fn len_utf16(self) -> usize {
702        len_utf16(self as u32)
703    }
704
705    /// Encodes this character as UTF-8 into the provided byte buffer,
706    /// and then returns the subslice of the buffer that contains the encoded character.
707    ///
708    /// # Panics
709    ///
710    /// Panics if the buffer is not large enough.
711    /// A buffer of length four is large enough to encode any `char`.
712    ///
713    /// # Examples
714    ///
715    /// In both of these examples, 'ß' takes two bytes to encode.
716    ///
717    /// ```
718    /// let mut b = [0; 2];
719    ///
720    /// let result = 'ß'.encode_utf8(&mut b);
721    ///
722    /// assert_eq!(result, "ß");
723    ///
724    /// assert_eq!(result.len(), 2);
725    /// ```
726    ///
727    /// A buffer that's too small:
728    ///
729    /// ```should_panic
730    /// let mut b = [0; 1];
731    ///
732    /// // this panics
733    /// 'ß'.encode_utf8(&mut b);
734    /// ```
735    #[stable(feature = "unicode_encode_char", since = "1.15.0")]
736    #[rustc_const_stable(feature = "const_char_encode_utf8", since = "1.83.0")]
737    #[inline]
738    #[ferrocene::prevalidated]
739    pub const fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
740        // SAFETY: `char` is not a surrogate, so this is valid UTF-8.
741        unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
742    }
743
744    /// Encodes this character as native endian UTF-16 into the provided `u16` buffer,
745    /// and then returns the subslice of the buffer that contains the encoded character.
746    ///
747    /// # Panics
748    ///
749    /// Panics if the buffer is not large enough.
750    /// A buffer of length 2 is large enough to encode any `char`.
751    ///
752    /// # Examples
753    ///
754    /// In both of these examples, '𝕊' takes two `u16`s to encode.
755    ///
756    /// ```
757    /// let mut b = [0; 2];
758    ///
759    /// let result = '𝕊'.encode_utf16(&mut b);
760    ///
761    /// assert_eq!(result.len(), 2);
762    /// ```
763    ///
764    /// A buffer that's too small:
765    ///
766    /// ```should_panic
767    /// let mut b = [0; 1];
768    ///
769    /// // this panics
770    /// '𝕊'.encode_utf16(&mut b);
771    /// ```
772    #[stable(feature = "unicode_encode_char", since = "1.15.0")]
773    #[rustc_const_stable(feature = "const_char_encode_utf16", since = "1.84.0")]
774    #[inline]
775    pub const fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
776        encode_utf16_raw(self as u32, dst)
777    }
778
779    /// Returns `true` if this `char` has the `Alphabetic` property.
780    ///
781    /// `Alphabetic` is [described] in Chapter 4 (Character Properties) of the Unicode Standard, and
782    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
783    ///
784    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G32524
785    /// [specified]: https://www.unicode.org/reports/tr44/#Alphabetic
786    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
787    ///
788    /// # Examples
789    ///
790    /// Basic usage:
791    ///
792    /// ```
793    /// assert!('a'.is_alphabetic());
794    /// assert!('京'.is_alphabetic());
795    ///
796    /// let c = '💝';
797    /// // love is many things, but it is not alphabetic
798    /// assert!(!c.is_alphabetic());
799    /// ```
800    #[must_use]
801    #[stable(feature = "rust1", since = "1.0.0")]
802    #[inline]
803    pub fn is_alphabetic(self) -> bool {
804        match self {
805            'a'..='z' | 'A'..='Z' => true,
806            '\0'..='\u{A9}' => false,
807            _ => unicode::Alphabetic(self),
808        }
809    }
810
811    /// Returns `true` if this `char` has the `Cased` property.
812    /// A character is cased if and only if it is uppercase, lowercase, or titlecase.
813    ///
814    /// `Cased` is [described] in Chapter 3 (Character Properties) of the Unicode Standard and
815    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
816    ///
817    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G44595
818    /// [specified]: https://www.unicode.org/reports/tr44/#Cased
819    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
820    ///
821    /// # Examples
822    ///
823    /// Basic usage:
824    ///
825    /// ```
826    /// #![feature(titlecase)]
827    /// assert!('A'.is_cased());
828    /// assert!('a'.is_cased());
829    /// assert!(!'京'.is_cased());
830    /// ```
831    #[must_use]
832    #[unstable(feature = "titlecase", issue = "153892")]
833    #[inline]
834    pub fn is_cased(self) -> bool {
835        match self {
836            'a'..='z' | 'A'..='Z' => true,
837            '\0'..='\u{A9}' => false,
838            _ => unicode::Lowercase(self) || unicode::Uppercase(self) || unicode::Lt(self),
839        }
840    }
841
842    /// Returns the case of this character:
843    /// [`Some(CharCase::Upper)`][`CharCase::Upper`] if [`self.is_uppercase()`][`char::is_uppercase`],
844    /// [`Some(CharCase::Lower)`][`CharCase::Lower`] if [`self.is_lowercase()`][`char::is_lowercase`],
845    /// [`Some(CharCase::Title)`][`CharCase::Title`] if [`self.is_titlecase()`][`char::is_titlecase`], and
846    /// `None` if [`!self.is_cased()`][`char::is_cased`].
847    ///
848    /// # Examples
849    ///
850    /// ```
851    /// #![feature(titlecase)]
852    /// use core::char::CharCase;
853    /// assert_eq!('a'.case(), Some(CharCase::Lower));
854    /// assert_eq!('δ'.case(), Some(CharCase::Lower));
855    /// assert_eq!('A'.case(), Some(CharCase::Upper));
856    /// assert_eq!('Δ'.case(), Some(CharCase::Upper));
857    /// assert_eq!('Dž'.case(), Some(CharCase::Title));
858    /// assert_eq!('中'.case(), None);
859    /// ```
860    #[must_use]
861    #[unstable(feature = "titlecase", issue = "153892")]
862    #[inline]
863    pub fn case(self) -> Option<CharCase> {
864        match self {
865            'a'..='z' => Some(CharCase::Lower),
866            'A'..='Z' => Some(CharCase::Upper),
867            '\0'..='\u{A9}' => None,
868            _ if unicode::Lowercase(self) => Some(CharCase::Lower),
869            _ if unicode::Uppercase(self) => Some(CharCase::Upper),
870            _ if unicode::Lt(self) => Some(CharCase::Title),
871            _ => None,
872        }
873    }
874
875    /// Returns `true` if this `char` has the `Lowercase` property.
876    ///
877    /// `Lowercase` is [described] in Chapter 4 (Character Properties) of the Unicode Standard, and
878    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
879    ///
880    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G136255
881    /// [specified]: https://www.unicode.org/reports/tr44/#Lowercase
882    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
883    ///
884    /// # Examples
885    ///
886    /// Basic usage:
887    ///
888    /// ```
889    /// assert!('a'.is_lowercase());
890    /// assert!('δ'.is_lowercase());
891    /// assert!(!'A'.is_lowercase());
892    /// assert!(!'Δ'.is_lowercase());
893    ///
894    /// // The various Chinese scripts and punctuation do not have case, and so:
895    /// assert!(!'中'.is_lowercase());
896    /// assert!(!' '.is_lowercase());
897    /// ```
898    ///
899    /// In a const context:
900    ///
901    /// ```
902    /// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ'.is_lowercase();
903    /// assert!(!CAPITAL_DELTA_IS_LOWERCASE);
904    /// ```
905    #[must_use]
906    #[stable(feature = "rust1", since = "1.0.0")]
907    #[rustc_const_stable(feature = "const_unicode_case_lookup", since = "1.84.0")]
908    #[inline]
909    pub const fn is_lowercase(self) -> bool {
910        match self {
911            'a'..='z' => true,
912            '\0'..='\u{A9}' => false,
913            _ => unicode::Lowercase(self),
914        }
915    }
916
917    /// Returns `true` if this `char` is in the general category for titlecase letters.
918    /// Conceptually, these characters consist of an uppercase portion followed by a lowercase portion.
919    ///
920    /// Titlecase letters (code points with the general category of `Lt`) are [described] in Chapter 4
921    /// (Character Properties) of the Unicode Standard, and [specified] in the Unicode Character
922    /// Database [`UnicodeData.txt`].
923    ///
924    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G124722
925    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
926    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
927    ///
928    /// # Examples
929    ///
930    /// Basic usage:
931    ///
932    /// ```
933    /// #![feature(titlecase)]
934    /// assert!('Dž'.is_titlecase());
935    /// assert!('ῼ'.is_titlecase());
936    /// assert!(!'D'.is_titlecase());
937    /// assert!(!'z'.is_titlecase());
938    /// assert!(!'中'.is_titlecase());
939    /// assert!(!' '.is_titlecase());
940    /// ```
941    #[must_use]
942    #[unstable(feature = "titlecase", issue = "153892")]
943    #[inline]
944    pub fn is_titlecase(self) -> bool {
945        match self {
946            '\0'..='\u{01C4}' => false,
947            _ => unicode::Lt(self),
948        }
949    }
950
951    /// Returns `true` if this `char` has the `Uppercase` property.
952    ///
953    /// `Uppercase` is [described] in Chapter 4 (Character Properties) of the Unicode Standard, and
954    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
955    ///
956    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G136255
957    /// [specified]: https://www.unicode.org/reports/tr44/#Uppercase
958    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
959    ///
960    /// # Examples
961    ///
962    /// Basic usage:
963    ///
964    /// ```
965    /// assert!(!'a'.is_uppercase());
966    /// assert!(!'δ'.is_uppercase());
967    /// assert!('A'.is_uppercase());
968    /// assert!('Δ'.is_uppercase());
969    ///
970    /// // The various Chinese scripts and punctuation do not have case, and so:
971    /// assert!(!'中'.is_uppercase());
972    /// assert!(!' '.is_uppercase());
973    /// ```
974    ///
975    /// In a const context:
976    ///
977    /// ```
978    /// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ'.is_uppercase();
979    /// assert!(CAPITAL_DELTA_IS_UPPERCASE);
980    /// ```
981    #[must_use]
982    #[stable(feature = "rust1", since = "1.0.0")]
983    #[rustc_const_stable(feature = "const_unicode_case_lookup", since = "1.84.0")]
984    #[inline]
985    pub const fn is_uppercase(self) -> bool {
986        match self {
987            'A'..='Z' => true,
988            '\0'..='\u{BF}' => false,
989            _ => unicode::Uppercase(self),
990        }
991    }
992
993    /// Returns `true` if this `char` has one of the general categories for numbers.
994    ///
995    /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
996    /// characters, and `No` for other numeric characters) are [specified] in the Unicode Character
997    /// Database [`UnicodeData.txt`].
998    ///
999    /// This method doesn't cover everything that could be considered a number, e.g. ideographic numbers like '三'.
1000    /// If you want everything including characters with overlapping purposes, then you might want to use
1001    /// a Unicode or language-processing library that exposes the appropriate character properties
1002    /// (e.g. [`Numeric_Type`]) instead of looking at the Unicode categories.
1003    ///
1004    /// If you want to parse ASCII decimal digits (0-9) or ASCII base-N, use
1005    /// `is_ascii_digit` or `is_digit` instead.
1006    ///
1007    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1008    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1009    /// [`Numeric_Type`]: https://www.unicode.org/reports/tr44/#Numeric_Type
1010    ///
1011    /// # Examples
1012    ///
1013    /// Basic usage:
1014    ///
1015    /// ```
1016    /// assert!('٣'.is_numeric());
1017    /// assert!('7'.is_numeric());
1018    /// assert!('৬'.is_numeric());
1019    /// assert!('¾'.is_numeric());
1020    /// assert!('①'.is_numeric());
1021    /// assert!(!'K'.is_numeric());
1022    /// assert!(!'و'.is_numeric());
1023    /// assert!(!'藏'.is_numeric());
1024    /// assert!(!'三'.is_numeric());
1025    /// ```
1026    #[must_use]
1027    #[stable(feature = "rust1", since = "1.0.0")]
1028    #[inline]
1029    pub fn is_numeric(self) -> bool {
1030        match self {
1031            '0'..='9' => true,
1032            '\0'..='\u{B1}' => false,
1033            _ => unicode::N(self),
1034        }
1035    }
1036
1037    /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
1038    ///
1039    /// [`is_alphabetic()`]: Self::is_alphabetic
1040    /// [`is_numeric()`]: Self::is_numeric
1041    ///
1042    /// # Examples
1043    ///
1044    /// Basic usage:
1045    ///
1046    /// ```
1047    /// assert!('٣'.is_alphanumeric());
1048    /// assert!('7'.is_alphanumeric());
1049    /// assert!('৬'.is_alphanumeric());
1050    /// assert!('¾'.is_alphanumeric());
1051    /// assert!('①'.is_alphanumeric());
1052    /// assert!('K'.is_alphanumeric());
1053    /// assert!('و'.is_alphanumeric());
1054    /// assert!('藏'.is_alphanumeric());
1055    /// ```
1056    #[must_use]
1057    #[stable(feature = "rust1", since = "1.0.0")]
1058    #[inline]
1059    pub fn is_alphanumeric(self) -> bool {
1060        match self {
1061            'a'..='z' | 'A'..='Z' | '0'..='9' => true,
1062            '\0'..='\u{A9}' => false,
1063            _ => unicode::Alphabetic(self) || unicode::N(self),
1064        }
1065    }
1066
1067    /// Returns `true` if this `char` has the `White_Space` property.
1068    ///
1069    /// `White_Space` is [specified] in the Unicode Character Database [`PropList.txt`].
1070    ///
1071    /// [specified]: https://www.unicode.org/reports/tr44/#White_Space
1072    /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
1073    ///
1074    /// # Examples
1075    ///
1076    /// Basic usage:
1077    ///
1078    /// ```
1079    /// assert!(' '.is_whitespace());
1080    ///
1081    /// // line break
1082    /// assert!('\n'.is_whitespace());
1083    ///
1084    /// // a non-breaking space
1085    /// assert!('\u{A0}'.is_whitespace());
1086    ///
1087    /// assert!(!'越'.is_whitespace());
1088    /// ```
1089    #[must_use]
1090    #[stable(feature = "rust1", since = "1.0.0")]
1091    #[rustc_const_stable(feature = "const_char_classify", since = "1.87.0")]
1092    #[inline]
1093    #[ferrocene::prevalidated]
1094    pub const fn is_whitespace(self) -> bool {
1095        match self {
1096            ' ' | '\x09'..='\x0d' => true,
1097            '\0'..='\u{84}' => false,
1098            _ => unicode::White_Space(self),
1099        }
1100    }
1101
1102    /// Returns `true` if this `char` has the general category for control codes.
1103    ///
1104    /// Control codes (code points with the general category of `Cc`) are [described] in Chapter 23
1105    /// (Special Areas and Format Characters) of the Unicode Standard, and [specified] in the Unicode Character
1106    /// Database [`UnicodeData.txt`]. The full set of Unicode control codes is
1107    /// `'\0'..='\x1f' | '\x7f'..='\u{9f}'`, and will never change.
1108    ///
1109    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-23/#G20365
1110    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1111    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1112    ///
1113    /// # Examples
1114    ///
1115    /// Basic usage:
1116    ///
1117    /// ```
1118    /// assert!('\t'.is_control());
1119    /// assert!('\n'.is_control());
1120    /// assert!('\u{9C}'.is_control()); // STRING TERMINATOR
1121    /// assert!(!'q'.is_control());
1122    /// ```
1123    #[ferrocene::prevalidated]
1124    #[must_use]
1125    #[stable(feature = "rust1", since = "1.0.0")]
1126    #[rustc_const_stable(feature = "const_is_control", since = "1.97.0")]
1127    #[inline]
1128    pub const fn is_control(self) -> bool {
1129        // According to
1130        // https://www.unicode.org/policies/stability_policy.html#Property_Value,
1131        // the set of codepoints in `Cc` will never change.
1132        // So we can just hard-code the patterns to match against instead of using a table.
1133        matches!(self, '\0'..='\x1f' | '\x7f'..='\u{9f}')
1134    }
1135
1136    /// Returns `true` if this `char` has the general category for [private-use characters].
1137    /// These characters do not have an interpretation specified by Unicode; individual programs
1138    /// and users are free to assign them whatever meaning they like.
1139    ///
1140    /// [private-use characters]: https://www.unicode.org/faq/private_use#private_use
1141    ///
1142    /// Private-use characters (code points with the general category of `Co`) are [described] in Chapter 23
1143    /// (Special Areas and Format Characters) of the Unicode Standard, and [specified] in the
1144    /// Unicode Character Database [`UnicodeData.txt`]. The full set of private-use characters is
1145    /// `'\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}'`,
1146    /// and will never change.
1147    ///
1148    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-23/#G19184
1149    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1150    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1151    ///
1152    #[ferrocene::prevalidated]
1153    #[must_use]
1154    #[inline]
1155    const fn is_private_use(self) -> bool {
1156        // According to
1157        // https://www.unicode.org/policies/stability_policy.html#Property_Value,
1158        // the set of codepoints in `Co` will never change.
1159        // So we can just hard-code the patterns to match against instead of using a table.
1160        matches!(self, '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}')
1161    }
1162
1163    /// Returns `true` if this `char` has the general category for format control characters.
1164    ///
1165    /// Format controls (code points with the general category of `Cf`) are [described] in Chapter 4
1166    /// (Character Properties) of the Unicode Standard, and [specified] in the Unicode Character
1167    /// Database [`UnicodeData.txt`].
1168    ///
1169    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G134153
1170    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1171    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1172    ///
1173    /// # Examples
1174    ///
1175    /// Basic usage:
1176    ///
1177    /// ```ignore(private)
1178    /// assert!('\u{AD}'.is_format_control()); // SOFT HYPHEN
1179    /// assert!('\u{200B}'.is_format_control()); // ZERO WIDTH SPACE
1180    /// assert!('\u{E0041}'.is_format_control()); // TAG LATIN CAPITAL LETTER A
1181    /// assert!('۝'.is_format_control()); // ARABIC END OF AYAH
1182    /// assert!('𓐲'.is_format_control()); // EGYPTIAN HIEROGLYPH INSERT AT TOP START
1183    /// assert!(!'q'.is_format_control());
1184    /// ```
1185    #[ferrocene::prevalidated]
1186    #[must_use]
1187    #[inline]
1188    fn is_format_control(self) -> bool {
1189        self > '\u{AC}' && unicode::Cf(self)
1190    }
1191
1192    /// Returns `true` if this `char` has not yet been assigned a meaning by Unicode, as of
1193    /// [`UNICODE_VERSION`].
1194    ///
1195    /// [`UNICODE_VERSION`]: Self::UNICODE_VERSION
1196    ///
1197    /// These characters may have a meaning assigned in the future,
1198    /// except for the 66 [noncharacters] which will never be assigned a meaning.
1199    ///
1200    /// [noncharacters]: https://www.unicode.org/faq/private_use#noncharacters
1201    ///
1202    /// Many of Unicode's [stability policies] apply only to assigned characters.
1203    ///
1204    /// [stability policies]: https://www.unicode.org/policies/stability_policy.html
1205    ///
1206    /// Unassigned characters (code points with the general category of `Cn`) are [described] in Chapter 4
1207    /// (Character Properties) of the Unicode Standard, and [specified] in the Unicode Character Database
1208    /// by their exclusion from [`UnicodeData.txt`].
1209    ///
1210    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G134153
1211    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1212    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1213    ///
1214    /// # Examples
1215    ///
1216    /// Basic usage:
1217    ///
1218    /// ```ignore(private)
1219    /// assert!('\u{FFFE}'.is_unassigned()); // noncharacter, will never be assigned
1220    ///
1221    /// //assert!('\u{7AAAA}'.is_unassigned()); // not currently assigned, but may be in the future,
1222    ///                                         // so we shouldn't rely on the current status
1223    ///
1224    /// assert!(!'γ'.is_unassigned()); // once a character is assigned, it stays assigned forever
1225    /// ```
1226    #[ferrocene::prevalidated]
1227    #[must_use]
1228    #[inline]
1229    fn is_unassigned(self) -> bool {
1230        match self {
1231            '\0'..='\u{377}' => false,
1232            '\u{378}'..='\u{3FFFD}' => unicode::Cn_planes_0_3(self),
1233            // Assigned character ranges in planes 4 and above.
1234            // `src/tools/unicode-table-generator/src/main.rs` asserts that this is correct
1235            '\u{E0001}'
1236            | '\u{E0020}'..='\u{E007F}'
1237            | '\u{E0100}'..='\u{E01EF}'
1238            | '\u{F0000}'..='\u{FFFFD}'
1239            | '\u{100000}'..='\u{10FFFD}' => false,
1240            _ => true,
1241        }
1242    }
1243
1244    /// Returns `true` if this `char` has the `Default_Ignorable_Code_Point` property.
1245    /// These characters [should be displayed as invisible in fallback rendering](https://www.unicode.org/faq/unsup_char#3).
1246    ///
1247    /// `Default_Ignorable_Code_Point` is [described] in Chapter 5 (Implementation Guidelines) of the Unicode Standard,
1248    /// and [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
1249    ///
1250    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-5/#G40120
1251    /// [specified]: https://www.unicode.org/reports/tr44/#Default_Ignorable_Code_Point
1252    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1253    ///
1254    /// # Examples
1255    ///
1256    /// Basic usage:
1257    ///
1258    /// ```ignore(private)
1259    /// assert!('\u{AD}'.is_default_ignorable()); // SOFT HYPHEN
1260    /// assert!('\u{115F}'.is_default_ignorable()); // HANGUL CHOSEONG FILLER
1261    /// assert!('\u{200B}'.is_default_ignorable()); // ZERO WIDTH SPACE
1262    /// assert!('\u{E0041}'.is_default_ignorable()); // TAG LATIN CAPITAL LETTER A
1263    /// assert!(!'۝'.is_default_ignorable()); // ARABIC END OF AYAH
1264    /// assert!(!'𓐲'.is_default_ignorable()); // EGYPTIAN HIEROGLYPH INSERT AT TOP START
1265    /// assert!(!' '.is_default_ignorable());
1266    /// assert!(!'\n'.is_default_ignorable());
1267    /// assert!(!'\0'.is_default_ignorable());
1268    /// assert!(!'q'.is_default_ignorable());
1269    #[ferrocene::prevalidated]
1270    #[must_use]
1271    #[inline]
1272    fn is_default_ignorable(self) -> bool {
1273        self > '\u{AC}' && unicode::Default_Ignorable_Code_Point(self)
1274    }
1275
1276    /// Returns `true` if this `char` has the `Grapheme_Extend` property.
1277    ///
1278    /// `Grapheme_Extend` is [described] in Chapter 3 (Conformance) of the Unicode Standard,
1279    /// and [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
1280    ///
1281    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G41165
1282    /// [specified]: https://www.unicode.org/reports/tr44/#Grapheme_Extend
1283    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1284    #[ferrocene::prevalidated]
1285    #[must_use]
1286    #[inline]
1287    fn is_grapheme_extender(self) -> bool {
1288        self > '\u{02FF}' && unicode::Grapheme_Extend(self)
1289    }
1290
1291    /// Returns `true` if this `char` has the `Case_Ignorable` property. This narrow-use property
1292    /// is used to implement context-dependent casing for the Greek letter sigma (uppercase 'Σ'),
1293    /// which has two lowercase forms.
1294    ///
1295    /// `Case_Ignorable` is [described] in Chapter 3 (Conformance) of the Unicode Core Specification,
1296    /// and [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
1297    /// See those resources, as well as [`to_lowercase()`]'s documentation, for more information.
1298    ///
1299    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G63116
1300    /// [specified]: https://www.unicode.org/reports/tr44/#Case_Ignorable
1301    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1302    /// [`to_lowercase()`]: Self::to_lowercase()
1303    #[must_use]
1304    #[inline]
1305    #[unstable(feature = "case_ignorable", issue = "154848")]
1306    pub fn is_case_ignorable(self) -> bool {
1307        if self.is_ascii() {
1308            matches!(self, '\'' | '.' | ':' | '^' | '`')
1309        } else {
1310            unicode::Case_Ignorable(self)
1311        }
1312    }
1313
1314    /// Returns an iterator that yields the lowercase mapping of this `char` as one or more
1315    /// `char`s.
1316    ///
1317    /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
1318    ///
1319    /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
1320    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1321    ///
1322    /// [ucd]: https://www.unicode.org/reports/tr44/
1323    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1324    ///
1325    /// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
1326    /// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
1327    ///
1328    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1329    /// is independent of context and language. See [below](#notes-on-context-and-locale)
1330    /// for more information.
1331    ///
1332    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1333    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1334    ///
1335    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1336    ///
1337    /// # Examples
1338    ///
1339    /// As an iterator:
1340    ///
1341    /// ```
1342    /// for c in 'İ'.to_lowercase() {
1343    ///     print!("{c}");
1344    /// }
1345    /// println!();
1346    /// ```
1347    ///
1348    /// Using `println!` directly:
1349    ///
1350    /// ```
1351    /// println!("{}", 'İ'.to_lowercase());
1352    /// ```
1353    ///
1354    /// Both are equivalent to:
1355    ///
1356    /// ```
1357    /// println!("i\u{307}");
1358    /// ```
1359    ///
1360    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1361    ///
1362    /// ```
1363    /// assert_eq!('C'.to_lowercase().to_string(), "c");
1364    ///
1365    /// // Sometimes the result is more than one character:
1366    /// assert_eq!('İ'.to_lowercase().to_string(), "i\u{307}");
1367    ///
1368    /// // Characters that do not have both uppercase and lowercase
1369    /// // convert into themselves.
1370    /// assert_eq!('山'.to_lowercase().to_string(), "山");
1371    /// ```
1372    /// # Notes on context and locale
1373    ///
1374    /// As stated earlier, this method does not take into account language or context.
1375    /// Below is a non-exhaustive list of situations where this can be relevant.
1376    /// If you need to handle locale-depedendent casing in your code, consider using
1377    /// an external crate, like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1378    /// which is developed by Unicode.
1379    ///
1380    /// ## Greek sigma
1381    ///
1382    /// In Greek, the letter simga (uppercase 'Σ') has two lowercase forms:
1383    /// 'σ' which is used in most situations, and 'ς' which appears only
1384    /// at the end of a word. [`char::to_lowercase()`] always uses the first form:
1385    ///
1386    /// ```
1387    /// assert_eq!('Σ'.to_lowercase().to_string(), "σ");
1388    /// ```
1389    ///
1390    /// `str::to_lowercase()` (only available with the `alloc` crate)
1391    /// *does* properly handle this contextual mapping,
1392    /// so prefer using that method if you can. Alternatively, you can use
1393    /// [`is_cased()`] and [`is_case_ignorable()`] to implement it yourself.
1394    /// See `Final_Sigma` in [Table 3.17] of the Unicode Standard,
1395    /// along with [`SpecialCasing.txt`], for more details.
1396    ///
1397    /// [`is_cased()`]: Self::is_cased()
1398    /// [`is_case_ignorable()`]: Self::is_case_ignorable()
1399    /// [Table 3.17]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G54277
1400    ///
1401    /// ## Turkish and Azeri I/ı/İ/i
1402    ///
1403    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1404    ///
1405    /// * 'Dotless': I / ı, sometimes written ï
1406    /// * 'Dotted': İ / i
1407    ///
1408    /// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
1409    ///
1410    /// ```
1411    /// let lower_i = 'I'.to_lowercase().to_string();
1412    /// ```
1413    ///
1414    /// `'I'`'s correct lowercase relies on the language of the text: if we're
1415    /// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
1416    /// be `"ı"`. `to_lowercase()` does not take this into account, and so:
1417    ///
1418    /// ```
1419    /// let lower_i = 'I'.to_lowercase().to_string();
1420    ///
1421    /// assert_eq!(lower_i, "i");
1422    /// ```
1423    ///
1424    /// holds across languages.
1425    ///
1426    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1427    #[must_use = "this returns the lowercased character as a new iterator, \
1428                  without modifying the original"]
1429    #[stable(feature = "rust1", since = "1.0.0")]
1430    #[inline]
1431    pub fn to_lowercase(self) -> ToLowercase {
1432        ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
1433    }
1434
1435    /// Returns an iterator that yields the titlecase mapping of this `char` as one or more
1436    /// `char`s.
1437    ///
1438    /// This is usually, but not always, equivalent to the uppercase mapping
1439    /// returned by [`to_uppercase()`]. Prefer this method when seeking to capitalize
1440    /// Only The First Letter of a word, but use [`to_uppercase()`] for ALL CAPS.
1441    /// See [below](#difference-from-uppercase) for a thorough explanation
1442    /// of the difference between the two methods.
1443    ///
1444    /// If this `char` does not have a titlecase mapping, the iterator yields the same `char`.
1445    ///
1446    /// If this `char` has a one-to-one titlecase mapping given by the [Unicode Character
1447    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1448    ///
1449    /// [ucd]: https://www.unicode.org/reports/tr44/
1450    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1451    ///
1452    /// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
1453    /// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
1454    ///
1455    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1456    ///
1457    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1458    /// is independent of context and language. See [below](#note-on-locale)
1459    /// for more information.
1460    ///
1461    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1462    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1463    ///
1464    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1465    ///
1466    /// # Examples
1467    ///
1468    /// As an iterator:
1469    ///
1470    /// ```
1471    /// #![feature(titlecase)]
1472    /// for c in 'ß'.to_titlecase() {
1473    ///     print!("{c}");
1474    /// }
1475    /// println!();
1476    /// ```
1477    ///
1478    /// Using `println!` directly:
1479    ///
1480    /// ```
1481    /// #![feature(titlecase)]
1482    /// println!("{}", 'ß'.to_titlecase());
1483    /// ```
1484    ///
1485    /// Both are equivalent to:
1486    ///
1487    /// ```
1488    /// println!("Ss");
1489    /// ```
1490    ///
1491    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1492    ///
1493    /// ```
1494    /// #![feature(titlecase)]
1495    /// assert_eq!('c'.to_titlecase().to_string(), "C");
1496    /// assert_eq!('ა'.to_titlecase().to_string(), "ა");
1497    /// assert_eq!('dž'.to_titlecase().to_string(), "Dž");
1498    /// assert_eq!('ᾨ'.to_titlecase().to_string(), "ᾨ");
1499    ///
1500    /// // Sometimes the result is more than one character:
1501    /// assert_eq!('ß'.to_titlecase().to_string(), "Ss");
1502    ///
1503    /// // Characters that do not have separate cased forms
1504    /// // convert into themselves.
1505    /// assert_eq!('山'.to_titlecase().to_string(), "山");
1506    /// ```
1507    ///
1508    /// # Difference from uppercase
1509    ///
1510    /// Currently, there are three classes of characters where [`to_uppercase()`]
1511    /// and `to_titlecase()` give different results:
1512    ///
1513    /// ## Georgian script
1514    ///
1515    /// Each letter in the modern Georgian alphabet can be written in one of two forms:
1516    /// the typical lowercase-like "mkhedruli" form, and a variant uppercase-like "mtavruli"
1517    /// form. However, unlike uppercase in most cased scripts, mtavruli is not typically used
1518    /// to start sentences, denote proper nouns, or for any other purpose
1519    /// in running text. It is instead confined to titles and headings, which are written entirely
1520    /// in mtavruli. For this reason, [`to_uppercase()`] applied to a Georgian letter
1521    /// will return the mtavruli form, but `to_titlecase()` will return the mkhedruli form.
1522    ///
1523    /// ```
1524    /// #![feature(titlecase)]
1525    /// let ani = 'ა'; // First letter of the Georgian alphabet, in mkhedruli form
1526    ///
1527    /// // Titlecasing mkhedruli maps it to itself...
1528    /// assert_eq!(ani.to_titlecase().to_string(), ani.to_string());
1529    ///
1530    /// // but uppercasing it maps it to mtavruli
1531    /// assert_eq!(ani.to_uppercase().to_string(), "Ა");
1532    /// ```
1533    ///
1534    /// ## Compatibility digraphs for Latin-alphabet Serbo-Croatian
1535    ///
1536    /// The standard Latin alphabet for the Serbo-Croatian language
1537    /// (Bosnian, Croatian, Montenegrin, and Serbian) contains
1538    /// three digraphs: Dž, Lj, and Nj. These are usually represented as
1539    /// two characters. However, for compatibility with older character sets,
1540    /// Unicode includes single-character versions of these digraphs.
1541    /// Each has a uppercase, titlecase, and lowercase version:
1542    ///
1543    /// - `'DŽ'`, `'Dž'`, `'dž'`
1544    /// - `'LJ'`, `'Lj'`, `'lj'`
1545    /// - `'NJ'`, `'Nj'`, `'nj'`
1546    ///
1547    /// Unicode additionally encodes a casing triad for the Dz digraph
1548    /// without the caron: `'DZ'`, `'Dz'`, `'dz'`.
1549    ///
1550    /// ## Iota-subscritped Greek vowels
1551    ///
1552    /// In ancient Greek, the long vowels alpha (α), eta (η), and omega (ω)
1553    /// were sometimes followed by an iota (ι), forming a diphthong. Over time,
1554    /// the diphthong pronunciation was slowly lost, with the iota becoming mute.
1555    /// Eventually, the ι disappeared from the spelling as well.
1556    /// However, there remains a need to represent ancient texts faithfully.
1557    ///
1558    /// Modern editions of ancient Greek texts commonly use a reduced-sized
1559    /// ι symbol to denote mute iotas, while distinguishing them from ιs
1560    /// which continued to affect pronunciation. The exact standard differs
1561    /// between different publications. Some render the mute ι below its associated
1562    /// vowel (subscript), while others place it to the right of said vowel (adscript).
1563    /// The interaction of mute ι symbols with casing also varies.
1564    ///
1565    /// The Unicode Standard, for its default casing rules, chose to make lowercase
1566    /// Greek vowels with iota subscipt (e.g. `'ᾠ'`) titlecase to the uppercase vowel
1567    /// with iota subscript (`'ᾨ'`) but uppercase to the uppercase vowel followed by
1568    /// full-size uppercase iota (`"ὨΙ"`). This is just one convention among many
1569    /// in common use, but it is the one Unicode settled on,
1570    /// so it is what this method does also.
1571    ///
1572    /// # Note on locale
1573    ///
1574    /// As stated above, this method is locale-insensitive.
1575    /// If you need locale support, consider using an external crate,
1576    /// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1577    /// which is developed by Unicode. A description of one common
1578    /// locale-dependent casing issue follows (there are others):
1579    ///
1580    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1581    ///
1582    /// * 'Dotless': I / ı, sometimes written ï
1583    /// * 'Dotted': İ / i
1584    ///
1585    /// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
1586    ///
1587    /// ```
1588    /// #![feature(titlecase)]
1589    /// let upper_i = 'i'.to_titlecase().to_string();
1590    /// ```
1591    ///
1592    /// `'i'`'s correct titlecase relies on the language of the text: if we're
1593    /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
1594    /// be `"İ"`. `to_titlecase()` does not take this into account, and so:
1595    ///
1596    /// ```
1597    /// #![feature(titlecase)]
1598    /// let upper_i = 'i'.to_titlecase().to_string();
1599    ///
1600    /// assert_eq!(upper_i, "I");
1601    /// ```
1602    ///
1603    /// holds across languages.
1604    ///
1605    /// [`to_uppercase()`]: Self::to_uppercase()
1606    #[must_use = "this returns the titlecased character as a new iterator, \
1607                  without modifying the original"]
1608    #[unstable(feature = "titlecase", issue = "153892")]
1609    #[inline]
1610    pub fn to_titlecase(self) -> ToTitlecase {
1611        ToTitlecase(CaseMappingIter::new(conversions::to_title(self)))
1612    }
1613
1614    /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
1615    /// `char`s.
1616    ///
1617    /// Prefer this method when converting a word into ALL CAPS, but consider [`to_titlecase()`]
1618    /// instead if you seek to capitalize Only The First Letter. See that method's documentation
1619    /// for more information on the difference between the two.
1620    ///
1621    /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`.
1622    ///
1623    /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
1624    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1625    ///
1626    /// [ucd]: https://www.unicode.org/reports/tr44/
1627    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1628    ///
1629    /// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
1630    /// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
1631    ///
1632    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1633    ///
1634    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1635    /// is independent of context and language. See [below](#note-on-locale)
1636    /// for more information.
1637    ///
1638    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1639    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1640    ///
1641    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1642    ///
1643    /// # Examples
1644    ///
1645    /// `'ſt'` (U+FB05) is a single Unicode code point (a ligature) that maps to "ST" in uppercase.
1646    ///
1647    /// As an iterator:
1648    ///
1649    /// ```
1650    /// for c in 'ſt'.to_uppercase() {
1651    ///     print!("{c}");
1652    /// }
1653    /// println!();
1654    /// ```
1655    ///
1656    /// Using `println!` directly:
1657    ///
1658    /// ```
1659    /// println!("{}", 'ſt'.to_uppercase());
1660    /// ```
1661    ///
1662    /// Both are equivalent to:
1663    ///
1664    /// ```
1665    /// println!("ST");
1666    /// ```
1667    ///
1668    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1669    ///
1670    /// ```
1671    /// assert_eq!('c'.to_uppercase().to_string(), "C");
1672    /// assert_eq!('ა'.to_uppercase().to_string(), "Ა");
1673    /// assert_eq!('dž'.to_uppercase().to_string(), "DŽ");
1674    ///
1675    /// // Sometimes the result is more than one character:
1676    /// assert_eq!('ſt'.to_uppercase().to_string(), "ST");
1677    /// assert_eq!('ᾨ'.to_uppercase().to_string(), "ὨΙ");
1678    ///
1679    /// // Characters that do not have both uppercase and lowercase
1680    /// // convert into themselves.
1681    /// assert_eq!('山'.to_uppercase().to_string(), "山");
1682    /// ```
1683    ///
1684    /// # Note on locale
1685    ///
1686    /// As stated above, this method is locale-insensitive.
1687    /// If you need locale support, consider using an external crate,
1688    /// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1689    /// which is developed by Unicode. A description of one common
1690    /// locale-dependent casing issue follows (there are others):
1691    ///
1692    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1693    ///
1694    /// * 'Dotless': I / ı, sometimes written ï
1695    /// * 'Dotted': İ / i
1696    ///
1697    /// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
1698    ///
1699    /// ```
1700    /// let upper_i = 'i'.to_uppercase().to_string();
1701    /// ```
1702    ///
1703    /// `'i'`'s correct uppercase relies on the language of the text: if we're
1704    /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
1705    /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
1706    ///
1707    /// ```
1708    /// let upper_i = 'i'.to_uppercase().to_string();
1709    ///
1710    /// assert_eq!(upper_i, "I");
1711    /// ```
1712    ///
1713    /// holds across languages.
1714    ///
1715    /// [`to_titlecase()`]: Self::to_titlecase()
1716    #[must_use = "this returns the uppercased character as a new iterator, \
1717                  without modifying the original"]
1718    #[stable(feature = "rust1", since = "1.0.0")]
1719    #[inline]
1720    pub fn to_uppercase(self) -> ToUppercase {
1721        ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
1722    }
1723
1724    /// Returns an iterator that yields the case folding of this `char` as one or more
1725    /// `char`s.
1726    ///
1727    /// Case folding is meant to be used when performing case-insensitive string comparisons.
1728    /// Case-folded strings should not usually be exposed directly to users. For most,
1729    /// but not all, characters, the casefold mapping is identical to the lowercase one.
1730    ///
1731    /// This iterator yields the `char`(s) in the common or full case folding for this `char`,
1732    /// as given by the [Unicode Character Database][ucd] [`CaseFolding.txt`].
1733    /// The maximum number of `char`s in a case folding is 3.
1734    ///
1735    /// [ucd]: https://www.unicode.org/reports/tr44/
1736    /// [`CaseFolding.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt
1737    ///
1738    ///
1739    /// No [normalization] (e.g. NFC) is performed, so visually and semantically identical characters
1740    /// might still casefold differently. For example, `'ά'` (U+03AC GREEK SMALL LETTER ALPHA WITH TONOS)
1741    /// is considered distinct from `'ά'` (U+1F71 GREEK SMALL LETTER ALPHA WITH OXIA),
1742    /// even though Unicode considers them canonically equivalent.
1743    ///
1744    /// In addition, this method is independent of language/locale,
1745    /// so the special behavior of I/ı/İ/i in Turkish and Azeri is not handled.
1746    ///
1747    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case folding in
1748    /// general and Chapter 3 (Conformance) discusses the default algorithm for case folding.
1749    ///
1750    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1751    ///
1752    /// # Examples
1753    ///
1754    /// The German sharp S `'ß'` (U+DF) is a single Unicode code point
1755    /// that casefolds to `"ss"`. Its uppercase variant '`ẞ`' (U+1E9E)
1756    /// has the same case-folding.
1757    ///
1758    /// As an iterator:
1759    ///
1760    /// ```
1761    /// #![feature(casefold)]
1762    /// assert!('ß'.to_casefold_unnormalized().eq(['s', 's']));
1763    /// assert!('ẞ'.to_casefold_unnormalized().eq(['s', 's']));
1764    /// ```
1765    ///
1766    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1767    ///
1768    /// ```
1769    /// #![feature(casefold)]
1770    /// assert_eq!('ß'.to_casefold_unnormalized().to_string(), "ss");
1771    /// assert_eq!('ẞ'.to_casefold_unnormalized().to_string(), "ss");
1772    /// ```
1773    ///
1774    /// No [normalization] is performed:
1775    ///
1776    /// ```rust
1777    /// #![feature(casefold)]
1778    /// // These two characters are visually and semantically identical;
1779    /// // Unicode considers them to be canonically equivalent.
1780    /// let alpha_tonos = 'ά';
1781    /// let alpha_oxia = 'ά';
1782    ///
1783    /// // However, they are different codepoints:
1784    /// assert_eq!(alpha_tonos, '\u{03AC}');
1785    /// assert_eq!(alpha_oxia, '\u{1F71}');
1786    ///
1787    /// // Their case-foldings are likewise unequal:
1788    /// assert!(alpha_tonos.to_casefold_unnormalized().eq(['\u{03AC}']));
1789    /// assert!(alpha_oxia.to_casefold_unnormalized().eq(['\u{1F71}']));
1790    /// ```
1791    ///
1792    /// # Note on locale
1793    ///
1794    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1795    ///
1796    /// * 'Dotless': I / ı, sometimes written ï
1797    /// * 'Dotted': İ / i
1798    ///
1799    /// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
1800    ///
1801    /// ```
1802    /// #![feature(casefold)]
1803    /// let casefold_i = 'I'.to_casefold_unnormalized().to_string();
1804    /// ```
1805    ///
1806    /// `'I'`'s correct case folding relies on the language of the text: if we're
1807    /// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
1808    /// be `"ı"`. `to_casefold_unnormalized()` does not take this into account, and so:
1809    ///
1810    /// ```
1811    /// #![feature(casefold)]
1812    /// let casefold_i = 'I'.to_casefold_unnormalized().to_string();
1813    ///
1814    /// assert_eq!(casefold_i, "i");
1815    /// ```
1816    ///
1817    /// holds across languages.
1818    ///
1819    /// [normalization]: https://www.unicode.org/faq/normalization.html
1820    #[must_use = "this returns the case-folded character as a new iterator, \
1821                  without modifying the original"]
1822    #[unstable(feature = "casefold", issue = "154742")]
1823    #[inline]
1824    pub fn to_casefold_unnormalized(self) -> ToCasefold {
1825        ToCasefold(CaseMappingIter::new(conversions::to_casefold(self)))
1826    }
1827
1828    /// Checks if the value is within the ASCII range.
1829    ///
1830    /// # Examples
1831    ///
1832    /// ```
1833    /// let ascii = 'a';
1834    /// let non_ascii = '❤';
1835    ///
1836    /// assert!(ascii.is_ascii());
1837    /// assert!(!non_ascii.is_ascii());
1838    /// ```
1839    #[must_use]
1840    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1841    #[rustc_const_stable(feature = "const_char_is_ascii", since = "1.32.0")]
1842    #[rustc_diagnostic_item = "char_is_ascii"]
1843    #[inline]
1844    #[ferrocene::prevalidated]
1845    pub const fn is_ascii(&self) -> bool {
1846        *self as u32 <= 0x7F
1847    }
1848
1849    /// Returns `Some` if the value is within the ASCII range,
1850    /// or `None` if it's not.
1851    ///
1852    /// This is preferred to [`Self::is_ascii`] when you're passing the value
1853    /// along to something else that can take [`ascii::Char`] rather than
1854    /// needing to check again for itself whether the value is in ASCII.
1855    #[must_use]
1856    #[unstable(feature = "ascii_char", issue = "110998")]
1857    #[inline]
1858    #[ferrocene::prevalidated]
1859    pub const fn as_ascii(&self) -> Option<ascii::Char> {
1860        if self.is_ascii() {
1861            // SAFETY: Just checked that this is ASCII.
1862            Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) })
1863        } else {
1864            None
1865        }
1866    }
1867
1868    /// Converts this char into an [ASCII character](`ascii::Char`), without
1869    /// checking whether it is valid.
1870    ///
1871    /// # Safety
1872    ///
1873    /// This char must be within the ASCII range, or else this is UB.
1874    #[must_use]
1875    #[unstable(feature = "ascii_char", issue = "110998")]
1876    #[inline]
1877    pub const unsafe fn as_ascii_unchecked(&self) -> ascii::Char {
1878        assert_unsafe_precondition!(
1879            check_library_ub,
1880            "as_ascii_unchecked requires that the char is valid ASCII",
1881            (it: &char = self) => it.is_ascii()
1882        );
1883
1884        // SAFETY: the caller promised that this char is ASCII.
1885        unsafe { ascii::Char::from_u8_unchecked(*self as u8) }
1886    }
1887
1888    /// Makes a copy of the value in its ASCII upper case equivalent.
1889    ///
1890    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1891    /// but non-ASCII letters are unchanged.
1892    ///
1893    /// To uppercase the value in-place, use [`make_ascii_uppercase()`].
1894    ///
1895    /// To uppercase ASCII characters in addition to non-ASCII characters, use
1896    /// [`to_uppercase()`].
1897    ///
1898    /// # Examples
1899    ///
1900    /// ```
1901    /// let ascii = 'a';
1902    /// let non_ascii = '❤';
1903    ///
1904    /// assert_eq!('A', ascii.to_ascii_uppercase());
1905    /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
1906    /// ```
1907    ///
1908    /// [`make_ascii_uppercase()`]: #method.make_ascii_uppercase
1909    /// [`to_uppercase()`]: #method.to_uppercase
1910    #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`"]
1911    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1912    #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1913    #[inline]
1914    pub const fn to_ascii_uppercase(&self) -> char {
1915        if self.is_ascii_lowercase() {
1916            (*self as u8).ascii_change_case_unchecked() as char
1917        } else {
1918            *self
1919        }
1920    }
1921
1922    /// Makes a copy of the value in its ASCII lower case equivalent.
1923    ///
1924    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1925    /// but non-ASCII letters are unchanged.
1926    ///
1927    /// To lowercase the value in-place, use [`make_ascii_lowercase()`].
1928    ///
1929    /// To lowercase ASCII characters in addition to non-ASCII characters, use
1930    /// [`to_lowercase()`].
1931    ///
1932    /// # Examples
1933    ///
1934    /// ```
1935    /// let ascii = 'A';
1936    /// let non_ascii = '❤';
1937    ///
1938    /// assert_eq!('a', ascii.to_ascii_lowercase());
1939    /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
1940    /// ```
1941    ///
1942    /// [`make_ascii_lowercase()`]: #method.make_ascii_lowercase
1943    /// [`to_lowercase()`]: #method.to_lowercase
1944    #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`"]
1945    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1946    #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1947    #[inline]
1948    pub const fn to_ascii_lowercase(&self) -> char {
1949        if self.is_ascii_uppercase() {
1950            (*self as u8).ascii_change_case_unchecked() as char
1951        } else {
1952            *self
1953        }
1954    }
1955
1956    /// Checks that two values are an ASCII case-insensitive match.
1957    ///
1958    /// Equivalent to <code>[to_ascii_lowercase]\(a) == [to_ascii_lowercase]\(b)</code>.
1959    ///
1960    /// # Examples
1961    ///
1962    /// ```
1963    /// let upper_a = 'A';
1964    /// let lower_a = 'a';
1965    /// let lower_z = 'z';
1966    ///
1967    /// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
1968    /// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
1969    /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
1970    /// ```
1971    ///
1972    /// [to_ascii_lowercase]: #method.to_ascii_lowercase
1973    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1974    #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1975    #[inline]
1976    pub const fn eq_ignore_ascii_case(&self, other: &char) -> bool {
1977        self.to_ascii_lowercase() == other.to_ascii_lowercase()
1978    }
1979
1980    /// Converts this type to its ASCII upper case equivalent in-place.
1981    ///
1982    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1983    /// but non-ASCII letters are unchanged.
1984    ///
1985    /// To return a new uppercased value without modifying the existing one, use
1986    /// [`to_ascii_uppercase()`].
1987    ///
1988    /// # Examples
1989    ///
1990    /// ```
1991    /// let mut ascii = 'a';
1992    ///
1993    /// ascii.make_ascii_uppercase();
1994    ///
1995    /// assert_eq!('A', ascii);
1996    /// ```
1997    ///
1998    /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
1999    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2000    #[rustc_const_stable(feature = "const_make_ascii", since = "1.84.0")]
2001    #[inline]
2002    pub const fn make_ascii_uppercase(&mut self) {
2003        *self = self.to_ascii_uppercase();
2004    }
2005
2006    /// Converts this type to its ASCII lower case equivalent in-place.
2007    ///
2008    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
2009    /// but non-ASCII letters are unchanged.
2010    ///
2011    /// To return a new lowercased value without modifying the existing one, use
2012    /// [`to_ascii_lowercase()`].
2013    ///
2014    /// # Examples
2015    ///
2016    /// ```
2017    /// let mut ascii = 'A';
2018    ///
2019    /// ascii.make_ascii_lowercase();
2020    ///
2021    /// assert_eq!('a', ascii);
2022    /// ```
2023    ///
2024    /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
2025    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2026    #[rustc_const_stable(feature = "const_make_ascii", since = "1.84.0")]
2027    #[inline]
2028    pub const fn make_ascii_lowercase(&mut self) {
2029        *self = self.to_ascii_lowercase();
2030    }
2031
2032    /// Checks if the value is an ASCII alphabetic character:
2033    ///
2034    /// - U+0041 'A' ..= U+005A 'Z', or
2035    /// - U+0061 'a' ..= U+007A 'z'.
2036    ///
2037    /// # Examples
2038    ///
2039    /// ```
2040    /// let uppercase_a = 'A';
2041    /// let uppercase_g = 'G';
2042    /// let a = 'a';
2043    /// let g = 'g';
2044    /// let zero = '0';
2045    /// let percent = '%';
2046    /// let space = ' ';
2047    /// let lf = '\n';
2048    /// let esc = '\x1b';
2049    ///
2050    /// assert!(uppercase_a.is_ascii_alphabetic());
2051    /// assert!(uppercase_g.is_ascii_alphabetic());
2052    /// assert!(a.is_ascii_alphabetic());
2053    /// assert!(g.is_ascii_alphabetic());
2054    /// assert!(!zero.is_ascii_alphabetic());
2055    /// assert!(!percent.is_ascii_alphabetic());
2056    /// assert!(!space.is_ascii_alphabetic());
2057    /// assert!(!lf.is_ascii_alphabetic());
2058    /// assert!(!esc.is_ascii_alphabetic());
2059    /// ```
2060    #[must_use]
2061    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2062    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2063    #[inline]
2064    pub const fn is_ascii_alphabetic(&self) -> bool {
2065        matches!(*self, 'a'..='z' | 'A'..='Z')
2066    }
2067
2068    /// Checks if the value is an ASCII uppercase character:
2069    /// U+0041 'A' ..= U+005A 'Z'.
2070    ///
2071    /// # Examples
2072    ///
2073    /// ```
2074    /// let uppercase_a = 'A';
2075    /// let uppercase_g = 'G';
2076    /// let a = 'a';
2077    /// let g = 'g';
2078    /// let zero = '0';
2079    /// let percent = '%';
2080    /// let space = ' ';
2081    /// let lf = '\n';
2082    /// let esc = '\x1b';
2083    ///
2084    /// assert!(uppercase_a.is_ascii_uppercase());
2085    /// assert!(uppercase_g.is_ascii_uppercase());
2086    /// assert!(!a.is_ascii_uppercase());
2087    /// assert!(!g.is_ascii_uppercase());
2088    /// assert!(!zero.is_ascii_uppercase());
2089    /// assert!(!percent.is_ascii_uppercase());
2090    /// assert!(!space.is_ascii_uppercase());
2091    /// assert!(!lf.is_ascii_uppercase());
2092    /// assert!(!esc.is_ascii_uppercase());
2093    /// ```
2094    #[must_use]
2095    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2096    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2097    #[inline]
2098    pub const fn is_ascii_uppercase(&self) -> bool {
2099        matches!(*self, 'A'..='Z')
2100    }
2101
2102    /// Checks if the value is an ASCII lowercase character:
2103    /// U+0061 'a' ..= U+007A 'z'.
2104    ///
2105    /// # Examples
2106    ///
2107    /// ```
2108    /// let uppercase_a = 'A';
2109    /// let uppercase_g = 'G';
2110    /// let a = 'a';
2111    /// let g = 'g';
2112    /// let zero = '0';
2113    /// let percent = '%';
2114    /// let space = ' ';
2115    /// let lf = '\n';
2116    /// let esc = '\x1b';
2117    ///
2118    /// assert!(!uppercase_a.is_ascii_lowercase());
2119    /// assert!(!uppercase_g.is_ascii_lowercase());
2120    /// assert!(a.is_ascii_lowercase());
2121    /// assert!(g.is_ascii_lowercase());
2122    /// assert!(!zero.is_ascii_lowercase());
2123    /// assert!(!percent.is_ascii_lowercase());
2124    /// assert!(!space.is_ascii_lowercase());
2125    /// assert!(!lf.is_ascii_lowercase());
2126    /// assert!(!esc.is_ascii_lowercase());
2127    /// ```
2128    #[must_use]
2129    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2130    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2131    #[inline]
2132    pub const fn is_ascii_lowercase(&self) -> bool {
2133        matches!(*self, 'a'..='z')
2134    }
2135
2136    /// Checks if the value is an ASCII alphanumeric character:
2137    ///
2138    /// - U+0041 'A' ..= U+005A 'Z', or
2139    /// - U+0061 'a' ..= U+007A 'z', or
2140    /// - U+0030 '0' ..= U+0039 '9'.
2141    ///
2142    /// # Examples
2143    ///
2144    /// ```
2145    /// let uppercase_a = 'A';
2146    /// let uppercase_g = 'G';
2147    /// let a = 'a';
2148    /// let g = 'g';
2149    /// let zero = '0';
2150    /// let percent = '%';
2151    /// let space = ' ';
2152    /// let lf = '\n';
2153    /// let esc = '\x1b';
2154    ///
2155    /// assert!(uppercase_a.is_ascii_alphanumeric());
2156    /// assert!(uppercase_g.is_ascii_alphanumeric());
2157    /// assert!(a.is_ascii_alphanumeric());
2158    /// assert!(g.is_ascii_alphanumeric());
2159    /// assert!(zero.is_ascii_alphanumeric());
2160    /// assert!(!percent.is_ascii_alphanumeric());
2161    /// assert!(!space.is_ascii_alphanumeric());
2162    /// assert!(!lf.is_ascii_alphanumeric());
2163    /// assert!(!esc.is_ascii_alphanumeric());
2164    /// ```
2165    #[must_use]
2166    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2167    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2168    #[inline]
2169    pub const fn is_ascii_alphanumeric(&self) -> bool {
2170        matches!(*self, '0'..='9') | matches!(*self, 'A'..='Z') | matches!(*self, 'a'..='z')
2171    }
2172
2173    /// Checks if the value is an ASCII decimal digit:
2174    /// U+0030 '0' ..= U+0039 '9'.
2175    ///
2176    /// # Examples
2177    ///
2178    /// ```
2179    /// let uppercase_a = 'A';
2180    /// let uppercase_g = 'G';
2181    /// let a = 'a';
2182    /// let g = 'g';
2183    /// let zero = '0';
2184    /// let percent = '%';
2185    /// let space = ' ';
2186    /// let lf = '\n';
2187    /// let esc = '\x1b';
2188    ///
2189    /// assert!(!uppercase_a.is_ascii_digit());
2190    /// assert!(!uppercase_g.is_ascii_digit());
2191    /// assert!(!a.is_ascii_digit());
2192    /// assert!(!g.is_ascii_digit());
2193    /// assert!(zero.is_ascii_digit());
2194    /// assert!(!percent.is_ascii_digit());
2195    /// assert!(!space.is_ascii_digit());
2196    /// assert!(!lf.is_ascii_digit());
2197    /// assert!(!esc.is_ascii_digit());
2198    /// ```
2199    #[must_use]
2200    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2201    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2202    #[inline]
2203    pub const fn is_ascii_digit(&self) -> bool {
2204        matches!(*self, '0'..='9')
2205    }
2206
2207    /// Checks if the value is an ASCII octal digit:
2208    /// U+0030 '0' ..= U+0037 '7'.
2209    ///
2210    /// # Examples
2211    ///
2212    /// ```
2213    /// #![feature(is_ascii_octdigit)]
2214    ///
2215    /// let uppercase_a = 'A';
2216    /// let a = 'a';
2217    /// let zero = '0';
2218    /// let seven = '7';
2219    /// let nine = '9';
2220    /// let percent = '%';
2221    /// let lf = '\n';
2222    ///
2223    /// assert!(!uppercase_a.is_ascii_octdigit());
2224    /// assert!(!a.is_ascii_octdigit());
2225    /// assert!(zero.is_ascii_octdigit());
2226    /// assert!(seven.is_ascii_octdigit());
2227    /// assert!(!nine.is_ascii_octdigit());
2228    /// assert!(!percent.is_ascii_octdigit());
2229    /// assert!(!lf.is_ascii_octdigit());
2230    /// ```
2231    #[must_use]
2232    #[unstable(feature = "is_ascii_octdigit", issue = "101288")]
2233    #[inline]
2234    pub const fn is_ascii_octdigit(&self) -> bool {
2235        matches!(*self, '0'..='7')
2236    }
2237
2238    /// Checks if the value is an ASCII hexadecimal digit:
2239    ///
2240    /// - U+0030 '0' ..= U+0039 '9', or
2241    /// - U+0041 'A' ..= U+0046 'F', or
2242    /// - U+0061 'a' ..= U+0066 'f'.
2243    ///
2244    /// # Examples
2245    ///
2246    /// ```
2247    /// let uppercase_a = 'A';
2248    /// let uppercase_g = 'G';
2249    /// let a = 'a';
2250    /// let g = 'g';
2251    /// let zero = '0';
2252    /// let percent = '%';
2253    /// let space = ' ';
2254    /// let lf = '\n';
2255    /// let esc = '\x1b';
2256    ///
2257    /// assert!(uppercase_a.is_ascii_hexdigit());
2258    /// assert!(!uppercase_g.is_ascii_hexdigit());
2259    /// assert!(a.is_ascii_hexdigit());
2260    /// assert!(!g.is_ascii_hexdigit());
2261    /// assert!(zero.is_ascii_hexdigit());
2262    /// assert!(!percent.is_ascii_hexdigit());
2263    /// assert!(!space.is_ascii_hexdigit());
2264    /// assert!(!lf.is_ascii_hexdigit());
2265    /// assert!(!esc.is_ascii_hexdigit());
2266    /// ```
2267    #[must_use]
2268    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2269    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2270    #[inline]
2271    pub const fn is_ascii_hexdigit(&self) -> bool {
2272        matches!(*self, '0'..='9') | matches!(*self, 'A'..='F') | matches!(*self, 'a'..='f')
2273    }
2274
2275    /// Checks if the value is an ASCII punctuation or symbol character
2276    /// (i.e. not alphanumeric, whitespace, or control):
2277    ///
2278    /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /`, or
2279    /// - U+003A ..= U+0040 `: ; < = > ? @`, or
2280    /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``, or
2281    /// - U+007B ..= U+007E `{ | } ~`
2282    ///
2283    /// # Examples
2284    ///
2285    /// ```
2286    /// let uppercase_a = 'A';
2287    /// let uppercase_g = 'G';
2288    /// let a = 'a';
2289    /// let g = 'g';
2290    /// let zero = '0';
2291    /// let percent = '%';
2292    /// let space = ' ';
2293    /// let lf = '\n';
2294    /// let esc = '\x1b';
2295    ///
2296    /// assert!(!uppercase_a.is_ascii_punctuation());
2297    /// assert!(!uppercase_g.is_ascii_punctuation());
2298    /// assert!(!a.is_ascii_punctuation());
2299    /// assert!(!g.is_ascii_punctuation());
2300    /// assert!(!zero.is_ascii_punctuation());
2301    /// assert!(percent.is_ascii_punctuation());
2302    /// assert!(!space.is_ascii_punctuation());
2303    /// assert!(!lf.is_ascii_punctuation());
2304    /// assert!(!esc.is_ascii_punctuation());
2305    /// ```
2306    #[must_use]
2307    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2308    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2309    #[inline]
2310    pub const fn is_ascii_punctuation(&self) -> bool {
2311        matches!(*self, '!'..='/')
2312            | matches!(*self, ':'..='@')
2313            | matches!(*self, '['..='`')
2314            | matches!(*self, '{'..='~')
2315    }
2316
2317    /// Checks if the value is an ASCII graphic character
2318    /// (i.e. not whitespace or control):
2319    /// U+0021 '!' ..= U+007E '~'.
2320    ///
2321    /// # Examples
2322    ///
2323    /// ```
2324    /// let uppercase_a = 'A';
2325    /// let uppercase_g = 'G';
2326    /// let a = 'a';
2327    /// let g = 'g';
2328    /// let zero = '0';
2329    /// let percent = '%';
2330    /// let space = ' ';
2331    /// let lf = '\n';
2332    /// let esc = '\x1b';
2333    ///
2334    /// assert!(uppercase_a.is_ascii_graphic());
2335    /// assert!(uppercase_g.is_ascii_graphic());
2336    /// assert!(a.is_ascii_graphic());
2337    /// assert!(g.is_ascii_graphic());
2338    /// assert!(zero.is_ascii_graphic());
2339    /// assert!(percent.is_ascii_graphic());
2340    /// assert!(!space.is_ascii_graphic());
2341    /// assert!(!lf.is_ascii_graphic());
2342    /// assert!(!esc.is_ascii_graphic());
2343    /// ```
2344    #[must_use]
2345    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2346    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2347    #[inline]
2348    pub const fn is_ascii_graphic(&self) -> bool {
2349        matches!(*self, '!'..='~')
2350    }
2351
2352    /// Checks if the value is an ASCII whitespace character:
2353    /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
2354    /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
2355    ///
2356    /// **Warning:** Because the list above excludes U+000B VERTICAL TAB,
2357    /// `c.is_ascii_whitespace()` is **not** equivalent to `c.is_ascii() && c.is_whitespace()`.
2358    ///
2359    /// Rust uses the WhatWG Infra Standard's [definition of ASCII
2360    /// whitespace][infra-aw]. There are several other definitions in
2361    /// wide use. For instance, [the POSIX locale][pct] includes
2362    /// U+000B VERTICAL TAB as well as all the above characters,
2363    /// but—from the very same specification—[the default rule for
2364    /// "field splitting" in the Bourne shell][bfs] considers *only*
2365    /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
2366    ///
2367    /// If you are writing a program that will process an existing
2368    /// file format, check what that format's definition of whitespace is
2369    /// before using this function.
2370    ///
2371    /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
2372    /// [pct]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
2373    /// [bfs]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
2374    ///
2375    /// # Examples
2376    ///
2377    /// ```
2378    /// let uppercase_a = 'A';
2379    /// let uppercase_g = 'G';
2380    /// let a = 'a';
2381    /// let g = 'g';
2382    /// let zero = '0';
2383    /// let percent = '%';
2384    /// let space = ' ';
2385    /// let lf = '\n';
2386    /// let esc = '\x1b';
2387    ///
2388    /// assert!(!uppercase_a.is_ascii_whitespace());
2389    /// assert!(!uppercase_g.is_ascii_whitespace());
2390    /// assert!(!a.is_ascii_whitespace());
2391    /// assert!(!g.is_ascii_whitespace());
2392    /// assert!(!zero.is_ascii_whitespace());
2393    /// assert!(!percent.is_ascii_whitespace());
2394    /// assert!(space.is_ascii_whitespace());
2395    /// assert!(lf.is_ascii_whitespace());
2396    /// assert!(!esc.is_ascii_whitespace());
2397    /// ```
2398    #[must_use]
2399    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2400    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2401    #[inline]
2402    #[ferrocene::prevalidated]
2403    pub const fn is_ascii_whitespace(&self) -> bool {
2404        matches!(*self, '\t' | '\n' | '\x0C' | '\r' | ' ')
2405    }
2406
2407    /// Checks if the value is an ASCII control character:
2408    /// U+0000 NUL ..= U+001F UNIT SEPARATOR, or U+007F DELETE.
2409    /// Note that most ASCII whitespace characters are control
2410    /// characters, but SPACE is not.
2411    ///
2412    /// # Examples
2413    ///
2414    /// ```
2415    /// let uppercase_a = 'A';
2416    /// let uppercase_g = 'G';
2417    /// let a = 'a';
2418    /// let g = 'g';
2419    /// let zero = '0';
2420    /// let percent = '%';
2421    /// let space = ' ';
2422    /// let lf = '\n';
2423    /// let esc = '\x1b';
2424    ///
2425    /// assert!(!uppercase_a.is_ascii_control());
2426    /// assert!(!uppercase_g.is_ascii_control());
2427    /// assert!(!a.is_ascii_control());
2428    /// assert!(!g.is_ascii_control());
2429    /// assert!(!zero.is_ascii_control());
2430    /// assert!(!percent.is_ascii_control());
2431    /// assert!(!space.is_ascii_control());
2432    /// assert!(lf.is_ascii_control());
2433    /// assert!(esc.is_ascii_control());
2434    /// ```
2435    #[must_use]
2436    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2437    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2438    #[inline]
2439    pub const fn is_ascii_control(&self) -> bool {
2440        matches!(*self, '\0'..='\x1F' | '\x7F')
2441    }
2442}
2443
2444#[ferrocene::prevalidated]
2445pub(crate) struct EscapeDebugExtArgs {
2446    /// Escape Grapheme Extender codepoints?
2447    pub(crate) escape_grapheme_extender: bool,
2448
2449    /// Escape single quotes?
2450    pub(crate) escape_single_quote: bool,
2451
2452    /// Escape double quotes?
2453    pub(crate) escape_double_quote: bool,
2454}
2455
2456impl EscapeDebugExtArgs {
2457    pub(crate) const ESCAPE_ALL: Self = Self {
2458        escape_grapheme_extender: true,
2459        escape_single_quote: true,
2460        escape_double_quote: true,
2461    };
2462}
2463
2464#[inline]
2465#[must_use]
2466#[ferrocene::prevalidated]
2467const fn len_utf8(code: u32) -> usize {
2468    match code {
2469        ..MAX_ONE_B => 1,
2470        ..MAX_TWO_B => 2,
2471        ..MAX_THREE_B => 3,
2472        _ => 4,
2473    }
2474}
2475
2476#[inline]
2477#[must_use]
2478const fn len_utf16(code: u32) -> usize {
2479    if (code & 0xFFFF) == code { 1 } else { 2 }
2480}
2481
2482/// Encodes a raw `u32` value as UTF-8 into the provided byte buffer,
2483/// and then returns the subslice of the buffer that contains the encoded character.
2484///
2485/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
2486/// (Creating a `char` in the surrogate range is UB.)
2487/// The result is valid [generalized UTF-8] but not valid UTF-8.
2488///
2489/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
2490///
2491/// # Panics
2492///
2493/// Panics if the buffer is not large enough.
2494/// A buffer of length four is large enough to encode any `char`.
2495#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
2496#[doc(hidden)]
2497#[inline]
2498#[ferrocene::prevalidated]
2499pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
2500    let len = len_utf8(code);
2501    if dst.len() < len {
2502        const_panic!(
2503            "encode_utf8: buffer does not have enough bytes to encode code point",
2504            "encode_utf8: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
2505            code: u32 = code,
2506            len: usize = len,
2507            dst_len: usize = dst.len(),
2508        );
2509    }
2510
2511    // SAFETY: `dst` is checked to be at least the length needed to encode the codepoint.
2512    unsafe { encode_utf8_raw_unchecked(code, dst.as_mut_ptr()) };
2513
2514    // SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
2515    unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
2516}
2517
2518/// Encodes a raw `u32` value as UTF-8 into the byte buffer pointed to by `dst`.
2519///
2520/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
2521/// (Creating a `char` in the surrogate range is UB.)
2522/// The result is valid [generalized UTF-8] but not valid UTF-8.
2523///
2524/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
2525///
2526/// # Safety
2527///
2528/// The behavior is undefined if the buffer pointed to by `dst` is not
2529/// large enough to hold the encoded codepoint. A buffer of length four
2530/// is large enough to encode any `char`.
2531///
2532/// For a safe version of this function, see the [`encode_utf8_raw`] function.
2533#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
2534#[doc(hidden)]
2535#[inline]
2536#[ferrocene::prevalidated]
2537pub const unsafe fn encode_utf8_raw_unchecked(code: u32, dst: *mut u8) {
2538    let len = len_utf8(code);
2539    // SAFETY: The caller must guarantee that the buffer pointed to by `dst`
2540    // is at least `len` bytes long.
2541    unsafe {
2542        if len == 1 {
2543            *dst = code as u8;
2544            return;
2545        }
2546
2547        let last1 = (code >> 0 & 0x3F) as u8 | TAG_CONT;
2548        let last2 = (code >> 6 & 0x3F) as u8 | TAG_CONT;
2549        let last3 = (code >> 12 & 0x3F) as u8 | TAG_CONT;
2550        let last4 = (code >> 18 & 0x3F) as u8 | TAG_FOUR_B;
2551
2552        if len == 2 {
2553            *dst = last2 | TAG_TWO_B;
2554            *dst.add(1) = last1;
2555            return;
2556        }
2557
2558        if len == 3 {
2559            *dst = last3 | TAG_THREE_B;
2560            *dst.add(1) = last2;
2561            *dst.add(2) = last1;
2562            return;
2563        }
2564
2565        *dst = last4;
2566        *dst.add(1) = last3;
2567        *dst.add(2) = last2;
2568        *dst.add(3) = last1;
2569    }
2570}
2571
2572/// Encodes a raw `u32` value as native endian UTF-16 into the provided `u16` buffer,
2573/// and then returns the subslice of the buffer that contains the encoded character.
2574///
2575/// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range.
2576/// (Creating a `char` in the surrogate range is UB.)
2577///
2578/// # Panics
2579///
2580/// Panics if the buffer is not large enough.
2581/// A buffer of length 2 is large enough to encode any `char`.
2582#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
2583#[doc(hidden)]
2584#[inline]
2585pub const fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
2586    let len = len_utf16(code);
2587    match (len, &mut *dst) {
2588        (1, [a, ..]) => {
2589            *a = code as u16;
2590        }
2591        (2, [a, b, ..]) => {
2592            code -= 0x1_0000;
2593            *a = (code >> 10) as u16 | 0xD800;
2594            *b = (code & 0x3FF) as u16 | 0xDC00;
2595        }
2596        _ => {
2597            const_panic!(
2598                "encode_utf16: buffer does not have enough bytes to encode code point",
2599                "encode_utf16: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
2600                code: u32 = code,
2601                len: usize = len,
2602                dst_len: usize = dst.len(),
2603            )
2604        }
2605    };
2606    // SAFETY: `<&mut [u16]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
2607    unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
2608}