Skip to main content

core/char/
methods.rs

1//! impl char {}
2
3use super::*;
4use crate::panic::const_panic;
5use crate::slice;
6use crate::str::from_utf8_unchecked_mut;
7use crate::ub_checks::assert_unsafe_precondition;
8use crate::unicode::{self, conversions};
9
10impl char {
11    /// The lowest valid code point a `char` can have, `'\0'`.
12    ///
13    /// Unlike integer types, `char` actually has a gap in the middle,
14    /// meaning that the range of possible `char`s is smaller than you
15    /// might expect. Ranges of `char` will automatically hop this gap
16    /// for you:
17    ///
18    /// ```
19    /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
20    /// let size = (char::MIN..=char::MAX).count() as u32;
21    /// assert!(size < dist);
22    /// ```
23    ///
24    /// Despite this gap, the `MIN` and [`MAX`] values can be used as bounds for
25    /// all `char` values.
26    ///
27    /// [`MAX`]: char::MAX
28    ///
29    /// # Examples
30    ///
31    /// ```
32    /// # fn something_which_returns_char() -> char { 'a' }
33    /// let c: char = something_which_returns_char();
34    /// assert!(char::MIN <= c);
35    ///
36    /// let value_at_min = u32::from(char::MIN);
37    /// assert_eq!(char::from_u32(value_at_min), Some('\0'));
38    /// ```
39    #[stable(feature = "char_min", since = "1.83.0")]
40    pub const MIN: char = '\0';
41
42    /// The highest valid code point a `char` can have, `'\u{10FFFF}'`.
43    ///
44    /// Unlike integer types, `char` actually has a gap in the middle,
45    /// meaning that the range of possible `char`s is smaller than you
46    /// might expect. Ranges of `char` will automatically hop this gap
47    /// for you:
48    ///
49    /// ```
50    /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
51    /// let size = (char::MIN..=char::MAX).count() as u32;
52    /// assert!(size < dist);
53    /// ```
54    ///
55    /// Despite this gap, the [`MIN`] and `MAX` values can be used as bounds for
56    /// all `char` values.
57    ///
58    /// [`MIN`]: char::MIN
59    ///
60    /// # Examples
61    ///
62    /// ```
63    /// # fn something_which_returns_char() -> char { 'a' }
64    /// let c: char = something_which_returns_char();
65    /// assert!(c <= char::MAX);
66    ///
67    /// let value_at_max = u32::from(char::MAX);
68    /// assert_eq!(char::from_u32(value_at_max), Some('\u{10FFFF}'));
69    /// assert_eq!(char::from_u32(value_at_max + 1), None);
70    /// ```
71    #[stable(feature = "assoc_char_consts", since = "1.52.0")]
72    pub const MAX: char = '\u{10FFFF}';
73
74    /// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
75    /// UTF-8 encoding.
76    #[stable(feature = "char_max_len_assoc", since = "1.93.0")]
77    pub const MAX_LEN_UTF8: usize = 4;
78
79    /// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
80    /// to UTF-16 encoding.
81    #[stable(feature = "char_max_len_assoc", since = "1.93.0")]
82    pub const MAX_LEN_UTF16: usize = 2;
83
84    /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
85    /// decoding error.
86    ///
87    /// It can occur, for example, when giving ill-formed UTF-8 bytes to
88    /// [`String::from_utf8_lossy`](../std/string/struct.String.html#method.from_utf8_lossy).
89    #[stable(feature = "assoc_char_consts", since = "1.52.0")]
90    pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
91
92    /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
93    /// `char` and `str` methods are based on.
94    ///
95    /// New versions of Unicode are released regularly, and subsequently all methods
96    /// in the standard library depending on Unicode are updated. Therefore, the
97    /// behavior of some `char` and `str` methods, and the value of this constant,
98    /// change over time (within the boundaries of Unicode's [stability policies]).
99    /// This is *not* considered to be a breaking change.
100    ///
101    /// [stability policies]: https://www.unicode.org/policies/stability_policy.html
102    ///
103    /// The version numbering scheme is explained in
104    /// [Section 3.1 (Version Numbering)] of the Unicode Standard.
105    ///
106    /// [Section 3.1 (Version Numbering)]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G49512
107    #[stable(feature = "assoc_char_consts", since = "1.52.0")]
108    pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION;
109
110    /// Creates an iterator over the native endian UTF-16 encoded code points in `iter`,
111    /// returning unpaired surrogates as `Err`s.
112    ///
113    /// # Examples
114    ///
115    /// Basic usage:
116    ///
117    /// ```
118    /// // 𝄞mus<invalid>ic<invalid>
119    /// let v = [
120    ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
121    /// ];
122    ///
123    /// assert_eq!(
124    ///     char::decode_utf16(v)
125    ///         .map(|r| r.map_err(|e| e.unpaired_surrogate()))
126    ///         .collect::<Vec<_>>(),
127    ///     vec![
128    ///         Ok('𝄞'),
129    ///         Ok('m'), Ok('u'), Ok('s'),
130    ///         Err(0xDD1E),
131    ///         Ok('i'), Ok('c'),
132    ///         Err(0xD834)
133    ///     ]
134    /// );
135    /// ```
136    ///
137    /// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
138    ///
139    /// ```
140    /// // 𝄞mus<invalid>ic<invalid>
141    /// let v = [
142    ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
143    /// ];
144    ///
145    /// assert_eq!(
146    ///     char::decode_utf16(v)
147    ///        .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER))
148    ///        .collect::<String>(),
149    ///     "𝄞mus�ic�"
150    /// );
151    /// ```
152    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
153    #[inline]
154    #[ferrocene::prevalidated]
155    pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
156        super::decode::decode_utf16(iter)
157    }
158
159    /// Converts a `u32` to a `char`.
160    ///
161    /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
162    /// [`as`](../std/keyword.as.html):
163    ///
164    /// ```
165    /// let c = '💯';
166    /// let i = c as u32;
167    ///
168    /// assert_eq!(128175, i);
169    /// ```
170    ///
171    /// However, the reverse is not true: not all valid [`u32`]s are valid
172    /// `char`s. `from_u32()` will return `None` if the input is not a valid value
173    /// for a `char`.
174    ///
175    /// For an unsafe version of this function which ignores these checks, see
176    /// [`from_u32_unchecked`].
177    ///
178    /// [`from_u32_unchecked`]: #method.from_u32_unchecked
179    ///
180    /// # Examples
181    ///
182    /// Basic usage:
183    ///
184    /// ```
185    /// let c = char::from_u32(0x2764);
186    ///
187    /// assert_eq!(Some('❤'), c);
188    /// ```
189    ///
190    /// Returning `None` when the input is not a valid `char`:
191    ///
192    /// ```
193    /// let c = char::from_u32(0x110000);
194    ///
195    /// assert_eq!(None, c);
196    /// ```
197    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
198    #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
199    #[must_use]
200    #[inline]
201    pub const fn from_u32(i: u32) -> Option<char> {
202        super::convert::from_u32(i)
203    }
204
205    /// Converts a `u32` to a `char`, ignoring validity.
206    ///
207    /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
208    /// `as`:
209    ///
210    /// ```
211    /// let c = '💯';
212    /// let i = c as u32;
213    ///
214    /// assert_eq!(128175, i);
215    /// ```
216    ///
217    /// However, the reverse is not true: not all valid [`u32`]s are valid
218    /// `char`s. `from_u32_unchecked()` will ignore this, and blindly cast to
219    /// `char`, possibly creating an invalid one.
220    ///
221    /// # Safety
222    ///
223    /// This function is unsafe, as it may construct invalid `char` values.
224    ///
225    /// For a safe version of this function, see the [`from_u32`] function.
226    ///
227    /// [`from_u32`]: #method.from_u32
228    ///
229    /// # Examples
230    ///
231    /// Basic usage:
232    ///
233    /// ```
234    /// let c = unsafe { char::from_u32_unchecked(0x2764) };
235    ///
236    /// assert_eq!('❤', c);
237    /// ```
238    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
239    #[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")]
240    #[must_use]
241    #[inline]
242    #[ferrocene::prevalidated]
243    pub const unsafe fn from_u32_unchecked(i: u32) -> char {
244        // SAFETY: the safety contract must be upheld by the caller.
245        unsafe { super::convert::from_u32_unchecked(i) }
246    }
247
248    /// Converts a digit in the given radix to a `char`.
249    ///
250    /// A 'radix' here is sometimes also called a 'base'. A radix of two
251    /// indicates a binary number, a radix of ten, decimal, and a radix of
252    /// sixteen, hexadecimal, to give some common values. Arbitrary
253    /// radices are supported.
254    ///
255    /// `from_digit()` will return `None` if the input is not a digit in
256    /// the given radix.
257    ///
258    /// # Panics
259    ///
260    /// Panics if given a radix larger than 36.
261    ///
262    /// # Examples
263    ///
264    /// Basic usage:
265    ///
266    /// ```
267    /// let c = char::from_digit(4, 10);
268    ///
269    /// assert_eq!(Some('4'), c);
270    ///
271    /// // Decimal 11 is a single digit in base 16
272    /// let c = char::from_digit(11, 16);
273    ///
274    /// assert_eq!(Some('b'), c);
275    /// ```
276    ///
277    /// Returning `None` when the input is not a digit:
278    ///
279    /// ```
280    /// let c = char::from_digit(20, 10);
281    ///
282    /// assert_eq!(None, c);
283    /// ```
284    ///
285    /// Passing a large radix, causing a panic:
286    ///
287    /// ```should_panic
288    /// // this panics
289    /// let _c = char::from_digit(1, 37);
290    /// ```
291    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
292    #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
293    #[must_use]
294    #[inline]
295    pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
296        super::convert::from_digit(num, radix)
297    }
298
299    /// Checks if a `char` is a digit in the given radix.
300    ///
301    /// A 'radix' here is sometimes also called a 'base'. A radix of two
302    /// indicates a binary number, a radix of ten, decimal, and a radix of
303    /// sixteen, hexadecimal, to give some common values. Arbitrary
304    /// radices are supported.
305    ///
306    /// Compared to [`is_numeric()`], this function only recognizes the characters
307    /// `0-9`, `a-z` and `A-Z`.
308    ///
309    /// 'Digit' is defined to be only the following characters:
310    ///
311    /// * `0-9`
312    /// * `a-z`
313    /// * `A-Z`
314    ///
315    /// For a more comprehensive understanding of 'digit', see [`is_numeric()`].
316    ///
317    /// [`is_numeric()`]: #method.is_numeric
318    ///
319    /// # Panics
320    ///
321    /// Panics if given a radix smaller than 2 or larger than 36.
322    ///
323    /// # Examples
324    ///
325    /// Basic usage:
326    ///
327    /// ```
328    /// assert!('1'.is_digit(10));
329    /// assert!('f'.is_digit(16));
330    /// assert!(!'f'.is_digit(10));
331    /// ```
332    ///
333    /// Passing a large radix, causing a panic:
334    ///
335    /// ```should_panic
336    /// // this panics
337    /// '1'.is_digit(37);
338    /// ```
339    ///
340    /// Passing a small radix, causing a panic:
341    ///
342    /// ```should_panic
343    /// // this panics
344    /// '1'.is_digit(1);
345    /// ```
346    #[stable(feature = "rust1", since = "1.0.0")]
347    #[rustc_const_stable(feature = "const_char_classify", since = "1.87.0")]
348    #[inline]
349    pub const fn is_digit(self, radix: u32) -> bool {
350        self.to_digit(radix).is_some()
351    }
352
353    /// Converts a `char` to a digit in the given radix.
354    ///
355    /// A 'radix' here is sometimes also called a 'base'. A radix of two
356    /// indicates a binary number, a radix of ten, decimal, and a radix of
357    /// sixteen, hexadecimal, to give some common values. Arbitrary
358    /// radices are supported.
359    ///
360    /// 'Digit' is defined to be only the following characters:
361    ///
362    /// * `0-9`
363    /// * `a-z`
364    /// * `A-Z`
365    ///
366    /// # Errors
367    ///
368    /// Returns `None` if the `char` does not refer to a digit in the given radix.
369    ///
370    /// # Panics
371    ///
372    /// Panics if given a radix smaller than 2 or larger than 36.
373    ///
374    /// # Examples
375    ///
376    /// Basic usage:
377    ///
378    /// ```
379    /// assert_eq!('1'.to_digit(10), Some(1));
380    /// assert_eq!('f'.to_digit(16), Some(15));
381    /// ```
382    ///
383    /// Passing a non-digit results in failure:
384    ///
385    /// ```
386    /// assert_eq!('f'.to_digit(10), None);
387    /// assert_eq!('z'.to_digit(16), None);
388    /// ```
389    ///
390    /// Passing a large radix, causing a panic:
391    ///
392    /// ```should_panic
393    /// // this panics
394    /// let _ = '1'.to_digit(37);
395    /// ```
396    /// Passing a small radix, causing a panic:
397    ///
398    /// ```should_panic
399    /// // this panics
400    /// let _ = '1'.to_digit(1);
401    /// ```
402    #[stable(feature = "rust1", since = "1.0.0")]
403    #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
404    #[rustc_diagnostic_item = "char_to_digit"]
405    #[must_use = "this returns the result of the operation, \
406                  without modifying the original"]
407    #[inline]
408    #[ferrocene::prevalidated]
409    pub const fn to_digit(self, radix: u32) -> Option<u32> {
410        assert!(
411            radix >= 2 && radix <= 36,
412            "to_digit: invalid radix -- radix must be in the range 2 to 36 inclusive"
413        );
414        // check radix to remove letter handling code when radix is a known constant
415        let value = if self > '9' && radix > 10 {
416            // mask to convert ASCII letters to uppercase
417            const TO_UPPERCASE_MASK: u32 = !0b0010_0000;
418            // Converts an ASCII letter to its corresponding integer value:
419            // A-Z => 10-35, a-z => 10-35. Other characters produce values >= 36.
420            //
421            // Add Overflow Safety:
422            // By applying the mask after the subtraction, the first addendum is
423            // constrained such that it never exceeds u32::MAX - 0x20.
424            ((self as u32).wrapping_sub('A' as u32) & TO_UPPERCASE_MASK) + 10
425        } else {
426            // convert digit to value, non-digits wrap to values > 36
427            (self as u32).wrapping_sub('0' as u32)
428        };
429        // FIXME(const-hack): once then_some is const fn, use it here
430        if value < radix { Some(value) } else { None }
431    }
432
433    /// Returns an iterator that yields the hexadecimal Unicode escape of a
434    /// character as `char`s.
435    ///
436    /// This will escape characters with the Rust syntax of the form
437    /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation.
438    ///
439    /// # Examples
440    ///
441    /// As an iterator:
442    ///
443    /// ```
444    /// for c in '❤'.escape_unicode() {
445    ///     print!("{c}");
446    /// }
447    /// println!();
448    /// ```
449    ///
450    /// Using `println!` directly:
451    ///
452    /// ```
453    /// println!("{}", '❤'.escape_unicode());
454    /// ```
455    ///
456    /// Both are equivalent to:
457    ///
458    /// ```
459    /// println!("\\u{{2764}}");
460    /// ```
461    ///
462    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
463    ///
464    /// ```
465    /// assert_eq!('❤'.escape_unicode().to_string(), "\\u{2764}");
466    /// ```
467    #[must_use = "this returns the escaped char as an iterator, \
468                  without modifying the original"]
469    #[stable(feature = "rust1", since = "1.0.0")]
470    #[inline]
471    #[ferrocene::prevalidated]
472    pub fn escape_unicode(self) -> EscapeUnicode {
473        EscapeUnicode::new(self)
474    }
475
476    /// An extended version of `escape_debug` that optionally permits escaping
477    /// Extended Grapheme codepoints, single quotes, and double quotes. This
478    /// allows us to format characters like nonspacing marks better when they're
479    /// at the start of a string, and allows escaping single quotes in
480    /// characters, and double quotes in strings.
481    #[inline]
482    #[ferrocene::prevalidated]
483    pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
484        match self {
485            // Special escapes
486            '\"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark),
487            '\'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe),
488            '\\' => EscapeDebug::backslash(ascii::Char::ReverseSolidus),
489            '\n' => EscapeDebug::backslash(ascii::Char::SmallN),
490            '\t' => EscapeDebug::backslash(ascii::Char::SmallT),
491            '\r' => EscapeDebug::backslash(ascii::Char::SmallR),
492            '\0' => EscapeDebug::backslash(ascii::Char::Digit0),
493
494            // ASCII fast path
495            '\x20'..='\x7E' => EscapeDebug::printable(self),
496
497            _ if self.is_control()
498                || self.is_private_use()
499                || self.is_whitespace()
500                || args.escape_grapheme_extender && self.is_grapheme_extender()
501                || self.is_default_ignorable()
502                || self.is_format_control()
503                || !self.is_assigned() =>
504            {
505                EscapeDebug::unicode(self)
506            }
507
508            _ => EscapeDebug::printable(self),
509        }
510    }
511
512    /// Returns an iterator that yields the literal escape code of a character
513    /// as `char`s.
514    ///
515    /// This will escape the characters similar to the [`Debug`](core::fmt::Debug) implementations
516    /// of `str` or `char`.
517    ///
518    /// # Examples
519    ///
520    /// As an iterator:
521    ///
522    /// ```
523    /// for c in '\n'.escape_debug() {
524    ///     print!("{c}");
525    /// }
526    /// println!();
527    /// ```
528    ///
529    /// Using `println!` directly:
530    ///
531    /// ```
532    /// println!("{}", '\n'.escape_debug());
533    /// ```
534    ///
535    /// Both are equivalent to:
536    ///
537    /// ```
538    /// println!("\\n");
539    /// ```
540    ///
541    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
542    ///
543    /// ```
544    /// assert_eq!('\n'.escape_debug().to_string(), "\\n");
545    /// ```
546    #[must_use = "this returns the escaped char as an iterator, \
547                  without modifying the original"]
548    #[stable(feature = "char_escape_debug", since = "1.20.0")]
549    #[inline]
550    #[ferrocene::prevalidated]
551    pub fn escape_debug(self) -> EscapeDebug {
552        self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL)
553    }
554
555    /// Returns an iterator that yields the literal escape code of a character
556    /// as `char`s.
557    ///
558    /// The default is chosen with a bias toward producing literals that are
559    /// legal in a variety of languages, including C++11 and similar C-family
560    /// languages. The exact rules are:
561    ///
562    /// * Tab is escaped as `\t`.
563    /// * Carriage return is escaped as `\r`.
564    /// * Line feed is escaped as `\n`.
565    /// * Single quote is escaped as `\'`.
566    /// * Double quote is escaped as `\"`.
567    /// * Backslash is escaped as `\\`.
568    /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e`
569    ///   inclusive is not escaped.
570    /// * All other characters are given hexadecimal Unicode escapes; see
571    ///   [`escape_unicode`].
572    ///
573    /// [`escape_unicode`]: #method.escape_unicode
574    ///
575    /// # Examples
576    ///
577    /// As an iterator:
578    ///
579    /// ```
580    /// for c in '"'.escape_default() {
581    ///     print!("{c}");
582    /// }
583    /// println!();
584    /// ```
585    ///
586    /// Using `println!` directly:
587    ///
588    /// ```
589    /// println!("{}", '"'.escape_default());
590    /// ```
591    ///
592    /// Both are equivalent to:
593    ///
594    /// ```
595    /// println!("\\\"");
596    /// ```
597    ///
598    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
599    ///
600    /// ```
601    /// assert_eq!('"'.escape_default().to_string(), "\\\"");
602    /// ```
603    #[must_use = "this returns the escaped char as an iterator, \
604                  without modifying the original"]
605    #[stable(feature = "rust1", since = "1.0.0")]
606    #[inline]
607    #[ferrocene::prevalidated]
608    pub fn escape_default(self) -> EscapeDefault {
609        match self {
610            '\t' => EscapeDefault::backslash(ascii::Char::SmallT),
611            '\r' => EscapeDefault::backslash(ascii::Char::SmallR),
612            '\n' => EscapeDefault::backslash(ascii::Char::SmallN),
613            '\\' | '\'' | '\"' => EscapeDefault::backslash(self.as_ascii().unwrap()),
614            '\x20'..='\x7e' => EscapeDefault::printable(self.as_ascii().unwrap()),
615            _ => EscapeDefault::unicode(self),
616        }
617    }
618
619    /// Returns the number of bytes this `char` would need if encoded in UTF-8.
620    ///
621    /// That number of bytes is always between 1 and 4, inclusive.
622    ///
623    /// # Examples
624    ///
625    /// Basic usage:
626    ///
627    /// ```
628    /// let len = 'A'.len_utf8();
629    /// assert_eq!(len, 1);
630    ///
631    /// let len = 'ß'.len_utf8();
632    /// assert_eq!(len, 2);
633    ///
634    /// let len = 'ℝ'.len_utf8();
635    /// assert_eq!(len, 3);
636    ///
637    /// let len = '💣'.len_utf8();
638    /// assert_eq!(len, 4);
639    /// ```
640    ///
641    /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it
642    /// would take if each code point was represented as a `char` vs in the `&str` itself:
643    ///
644    /// ```
645    /// // as chars
646    /// let eastern = '東';
647    /// let capital = '京';
648    ///
649    /// // both can be represented as three bytes
650    /// assert_eq!(3, eastern.len_utf8());
651    /// assert_eq!(3, capital.len_utf8());
652    ///
653    /// // as a &str, these two are encoded in UTF-8
654    /// let tokyo = "東京";
655    ///
656    /// let len = eastern.len_utf8() + capital.len_utf8();
657    ///
658    /// // we can see that they take six bytes total...
659    /// assert_eq!(6, tokyo.len());
660    ///
661    /// // ... just like the &str
662    /// assert_eq!(len, tokyo.len());
663    /// ```
664    #[stable(feature = "rust1", since = "1.0.0")]
665    #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
666    #[inline]
667    #[must_use]
668    #[ferrocene::prevalidated]
669    pub const fn len_utf8(self) -> usize {
670        len_utf8(self as u32)
671    }
672
673    /// Returns the number of 16-bit code units this `char` would need if
674    /// encoded in UTF-16.
675    ///
676    /// That number of code units is always either 1 or 2, for unicode scalar values in
677    /// the [basic multilingual plane] or [supplementary planes] respectively.
678    ///
679    /// See the documentation for [`len_utf8()`] for more explanation of this
680    /// concept. This function is a mirror, but for UTF-16 instead of UTF-8.
681    ///
682    /// [basic multilingual plane]: http://www.unicode.org/glossary/#basic_multilingual_plane
683    /// [supplementary planes]: http://www.unicode.org/glossary/#supplementary_planes
684    /// [`len_utf8()`]: #method.len_utf8
685    ///
686    /// # Examples
687    ///
688    /// Basic usage:
689    ///
690    /// ```
691    /// let n = 'ß'.len_utf16();
692    /// assert_eq!(n, 1);
693    ///
694    /// let len = '💣'.len_utf16();
695    /// assert_eq!(len, 2);
696    /// ```
697    #[stable(feature = "rust1", since = "1.0.0")]
698    #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
699    #[inline]
700    #[must_use]
701    pub const fn len_utf16(self) -> usize {
702        len_utf16(self as u32)
703    }
704
705    /// Encodes this character as UTF-8 into the provided byte buffer,
706    /// and then returns the subslice of the buffer that contains the encoded character.
707    ///
708    /// # Panics
709    ///
710    /// Panics if the buffer is not large enough.
711    /// A buffer of length four is large enough to encode any `char`.
712    ///
713    /// # Examples
714    ///
715    /// In both of these examples, 'ß' takes two bytes to encode.
716    ///
717    /// ```
718    /// let mut b = [0; 2];
719    ///
720    /// let result = 'ß'.encode_utf8(&mut b);
721    ///
722    /// assert_eq!(result, "ß");
723    ///
724    /// assert_eq!(result.len(), 2);
725    /// ```
726    ///
727    /// A buffer that's too small:
728    ///
729    /// ```should_panic
730    /// let mut b = [0; 1];
731    ///
732    /// // this panics
733    /// 'ß'.encode_utf8(&mut b);
734    /// ```
735    #[stable(feature = "unicode_encode_char", since = "1.15.0")]
736    #[rustc_const_stable(feature = "const_char_encode_utf8", since = "1.83.0")]
737    #[inline]
738    #[ferrocene::prevalidated]
739    pub const fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
740        // SAFETY: `char` is not a surrogate, so this is valid UTF-8.
741        unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
742    }
743
744    /// Encodes this character as native endian UTF-16 into the provided `u16` buffer,
745    /// and then returns the subslice of the buffer that contains the encoded character.
746    ///
747    /// # Panics
748    ///
749    /// Panics if the buffer is not large enough.
750    /// A buffer of length 2 is large enough to encode any `char`.
751    ///
752    /// # Examples
753    ///
754    /// In both of these examples, '𝕊' takes two `u16`s to encode.
755    ///
756    /// ```
757    /// let mut b = [0; 2];
758    ///
759    /// let result = '𝕊'.encode_utf16(&mut b);
760    ///
761    /// assert_eq!(result.len(), 2);
762    /// ```
763    ///
764    /// A buffer that's too small:
765    ///
766    /// ```should_panic
767    /// let mut b = [0; 1];
768    ///
769    /// // this panics
770    /// '𝕊'.encode_utf16(&mut b);
771    /// ```
772    #[stable(feature = "unicode_encode_char", since = "1.15.0")]
773    #[rustc_const_stable(feature = "const_char_encode_utf16", since = "1.84.0")]
774    #[inline]
775    pub const fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
776        encode_utf16_raw(self as u32, dst)
777    }
778
779    /// Returns `true` if this `char` has the `Alphabetic` property.
780    ///
781    /// `Alphabetic` is [described] in Chapter 4 (Character Properties) of the Unicode Standard, and
782    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
783    ///
784    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G32524
785    /// [specified]: https://www.unicode.org/reports/tr44/#Alphabetic
786    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
787    ///
788    /// # Examples
789    ///
790    /// Basic usage:
791    ///
792    /// ```
793    /// assert!('a'.is_alphabetic());
794    /// assert!('京'.is_alphabetic());
795    ///
796    /// let c = '💝';
797    /// // love is many things, but it is not alphabetic
798    /// assert!(!c.is_alphabetic());
799    /// ```
800    #[must_use]
801    #[stable(feature = "rust1", since = "1.0.0")]
802    #[inline]
803    pub fn is_alphabetic(self) -> bool {
804        match self {
805            'a'..='z' | 'A'..='Z' => true,
806            '\0'..='\u{A9}' => false,
807            _ => unicode::Alphabetic(self),
808        }
809    }
810
811    /// Returns `true` if this `char` has the `Cased` property.
812    /// A character is cased if and only if it is uppercase, lowercase, or titlecase.
813    ///
814    /// `Cased` is [described] in Chapter 3 (Character Properties) of the Unicode Standard and
815    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
816    ///
817    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G44595
818    /// [specified]: https://www.unicode.org/reports/tr44/#Cased
819    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
820    ///
821    /// # Examples
822    ///
823    /// Basic usage:
824    ///
825    /// ```
826    /// #![feature(titlecase)]
827    /// assert!('A'.is_cased());
828    /// assert!('a'.is_cased());
829    /// assert!(!'京'.is_cased());
830    /// ```
831    #[must_use]
832    #[unstable(feature = "titlecase", issue = "153892")]
833    #[inline]
834    pub fn is_cased(self) -> bool {
835        match self {
836            'a'..='z' | 'A'..='Z' => true,
837            '\0'..='\u{A9}' => false,
838            _ => unicode::Lowercase(self) || unicode::Uppercase(self) || unicode::Lt(self),
839        }
840    }
841
842    /// Returns the case of this character:
843    /// [`Some(CharCase::Upper)`][`CharCase::Upper`] if [`self.is_uppercase()`][`char::is_uppercase`],
844    /// [`Some(CharCase::Lower)`][`CharCase::Lower`] if [`self.is_lowercase()`][`char::is_lowercase`],
845    /// [`Some(CharCase::Title)`][`CharCase::Title`] if [`self.is_titlecase()`][`char::is_titlecase`], and
846    /// `None` if [`!self.is_cased()`][`char::is_cased`].
847    ///
848    /// # Examples
849    ///
850    /// ```
851    /// #![feature(titlecase)]
852    /// use core::char::CharCase;
853    /// assert_eq!('a'.case(), Some(CharCase::Lower));
854    /// assert_eq!('δ'.case(), Some(CharCase::Lower));
855    /// assert_eq!('A'.case(), Some(CharCase::Upper));
856    /// assert_eq!('Δ'.case(), Some(CharCase::Upper));
857    /// assert_eq!('Dž'.case(), Some(CharCase::Title));
858    /// assert_eq!('中'.case(), None);
859    /// ```
860    #[must_use]
861    #[unstable(feature = "titlecase", issue = "153892")]
862    #[inline]
863    pub fn case(self) -> Option<CharCase> {
864        match self {
865            'a'..='z' => Some(CharCase::Lower),
866            'A'..='Z' => Some(CharCase::Upper),
867            '\0'..='\u{A9}' => None,
868            _ if unicode::Lowercase(self) => Some(CharCase::Lower),
869            _ if unicode::Uppercase(self) => Some(CharCase::Upper),
870            _ if unicode::Lt(self) => Some(CharCase::Title),
871            _ => None,
872        }
873    }
874
875    /// Returns `true` if this `char` has the `Lowercase` property.
876    ///
877    /// `Lowercase` is [described] in Chapter 4 (Character Properties) of the Unicode Standard, and
878    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
879    ///
880    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G136255
881    /// [specified]: https://www.unicode.org/reports/tr44/#Lowercase
882    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
883    ///
884    /// # Examples
885    ///
886    /// Basic usage:
887    ///
888    /// ```
889    /// assert!('a'.is_lowercase());
890    /// assert!('δ'.is_lowercase());
891    /// assert!(!'A'.is_lowercase());
892    /// assert!(!'Δ'.is_lowercase());
893    ///
894    /// // The various Chinese scripts and punctuation do not have case, and so:
895    /// assert!(!'中'.is_lowercase());
896    /// assert!(!' '.is_lowercase());
897    /// ```
898    ///
899    /// In a const context:
900    ///
901    /// ```
902    /// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ'.is_lowercase();
903    /// assert!(!CAPITAL_DELTA_IS_LOWERCASE);
904    /// ```
905    #[must_use]
906    #[stable(feature = "rust1", since = "1.0.0")]
907    #[rustc_const_stable(feature = "const_unicode_case_lookup", since = "1.84.0")]
908    #[inline]
909    pub const fn is_lowercase(self) -> bool {
910        match self {
911            'a'..='z' => true,
912            '\0'..='\u{A9}' => false,
913            _ => unicode::Lowercase(self),
914        }
915    }
916
917    /// Returns `true` if this `char` is in the general category for titlecase letters.
918    /// Conceptually, these characters consist of an uppercase portion followed by a lowercase portion.
919    ///
920    /// Titlecase letters (code points with the general category of `Lt`) are [described] in Chapter 4
921    /// (Character Properties) of the Unicode Standard, and [specified] in the Unicode Character
922    /// Database [`UnicodeData.txt`].
923    ///
924    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G124722
925    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
926    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
927    ///
928    /// # Examples
929    ///
930    /// Basic usage:
931    ///
932    /// ```
933    /// #![feature(titlecase)]
934    /// assert!('Dž'.is_titlecase());
935    /// assert!('ῼ'.is_titlecase());
936    /// assert!(!'D'.is_titlecase());
937    /// assert!(!'z'.is_titlecase());
938    /// assert!(!'中'.is_titlecase());
939    /// assert!(!' '.is_titlecase());
940    /// ```
941    #[must_use]
942    #[unstable(feature = "titlecase", issue = "153892")]
943    #[inline]
944    pub fn is_titlecase(self) -> bool {
945        match self {
946            '\0'..='\u{01C4}' => false,
947            _ => unicode::Lt(self),
948        }
949    }
950
951    /// Returns `true` if this `char` has the `Uppercase` property.
952    ///
953    /// `Uppercase` is [described] in Chapter 4 (Character Properties) of the Unicode Standard, and
954    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
955    ///
956    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G136255
957    /// [specified]: https://www.unicode.org/reports/tr44/#Uppercase
958    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
959    ///
960    /// # Examples
961    ///
962    /// Basic usage:
963    ///
964    /// ```
965    /// assert!(!'a'.is_uppercase());
966    /// assert!(!'δ'.is_uppercase());
967    /// assert!('A'.is_uppercase());
968    /// assert!('Δ'.is_uppercase());
969    ///
970    /// // The various Chinese scripts and punctuation do not have case, and so:
971    /// assert!(!'中'.is_uppercase());
972    /// assert!(!' '.is_uppercase());
973    /// ```
974    ///
975    /// In a const context:
976    ///
977    /// ```
978    /// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ'.is_uppercase();
979    /// assert!(CAPITAL_DELTA_IS_UPPERCASE);
980    /// ```
981    #[must_use]
982    #[stable(feature = "rust1", since = "1.0.0")]
983    #[rustc_const_stable(feature = "const_unicode_case_lookup", since = "1.84.0")]
984    #[inline]
985    pub const fn is_uppercase(self) -> bool {
986        match self {
987            'A'..='Z' => true,
988            '\0'..='\u{BF}' => false,
989            _ => unicode::Uppercase(self),
990        }
991    }
992
993    /// Returns `true` if this `char` has one of the general categories for numbers.
994    ///
995    /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
996    /// characters, and `No` for other numeric characters) are [specified] in the Unicode Character
997    /// Database [`UnicodeData.txt`].
998    ///
999    /// This method doesn't cover everything that could be considered a number, e.g. ideographic numbers like '三'.
1000    /// If you want everything including characters with overlapping purposes, then you might want to use
1001    /// a Unicode or language-processing library that exposes the appropriate character properties
1002    /// (e.g. [`Numeric_Type`]) instead of looking at the Unicode categories.
1003    ///
1004    /// If you want to parse ASCII decimal digits (0-9) or ASCII base-N, use
1005    /// `is_ascii_digit` or `is_digit` instead.
1006    ///
1007    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1008    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1009    /// [`Numeric_Type`]: https://www.unicode.org/reports/tr44/#Numeric_Type
1010    ///
1011    /// # Examples
1012    ///
1013    /// Basic usage:
1014    ///
1015    /// ```
1016    /// assert!('٣'.is_numeric());
1017    /// assert!('7'.is_numeric());
1018    /// assert!('৬'.is_numeric());
1019    /// assert!('¾'.is_numeric());
1020    /// assert!('①'.is_numeric());
1021    /// assert!(!'K'.is_numeric());
1022    /// assert!(!'و'.is_numeric());
1023    /// assert!(!'藏'.is_numeric());
1024    /// assert!(!'三'.is_numeric());
1025    /// ```
1026    #[must_use]
1027    #[stable(feature = "rust1", since = "1.0.0")]
1028    #[inline]
1029    pub fn is_numeric(self) -> bool {
1030        match self {
1031            '0'..='9' => true,
1032            '\0'..='\u{B1}' => false,
1033            _ => unicode::N(self),
1034        }
1035    }
1036
1037    /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
1038    ///
1039    /// [`is_alphabetic()`]: Self::is_alphabetic
1040    /// [`is_numeric()`]: Self::is_numeric
1041    ///
1042    /// # Examples
1043    ///
1044    /// Basic usage:
1045    ///
1046    /// ```
1047    /// assert!('٣'.is_alphanumeric());
1048    /// assert!('7'.is_alphanumeric());
1049    /// assert!('৬'.is_alphanumeric());
1050    /// assert!('¾'.is_alphanumeric());
1051    /// assert!('①'.is_alphanumeric());
1052    /// assert!('K'.is_alphanumeric());
1053    /// assert!('و'.is_alphanumeric());
1054    /// assert!('藏'.is_alphanumeric());
1055    /// ```
1056    #[must_use]
1057    #[stable(feature = "rust1", since = "1.0.0")]
1058    #[inline]
1059    pub fn is_alphanumeric(self) -> bool {
1060        match self {
1061            'a'..='z' | 'A'..='Z' | '0'..='9' => true,
1062            '\0'..='\u{A9}' => false,
1063            _ => unicode::Alphabetic(self) || unicode::N(self),
1064        }
1065    }
1066
1067    /// Returns `true` if this `char` has the `White_Space` property.
1068    ///
1069    /// `White_Space` is [specified] in the Unicode Character Database [`PropList.txt`].
1070    ///
1071    /// [specified]: https://www.unicode.org/reports/tr44/#White_Space
1072    /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
1073    ///
1074    /// # Examples
1075    ///
1076    /// Basic usage:
1077    ///
1078    /// ```
1079    /// assert!(' '.is_whitespace());
1080    ///
1081    /// // line break
1082    /// assert!('\n'.is_whitespace());
1083    ///
1084    /// // a non-breaking space
1085    /// assert!('\u{A0}'.is_whitespace());
1086    ///
1087    /// assert!(!'越'.is_whitespace());
1088    /// ```
1089    #[must_use]
1090    #[stable(feature = "rust1", since = "1.0.0")]
1091    #[rustc_const_stable(feature = "const_char_classify", since = "1.87.0")]
1092    #[inline]
1093    #[ferrocene::prevalidated]
1094    pub const fn is_whitespace(self) -> bool {
1095        match self {
1096            ' ' | '\x09'..='\x0d' => true,
1097            '\0'..='\u{84}' => false,
1098            _ => unicode::White_Space(self),
1099        }
1100    }
1101
1102    /// Returns `true` if this `char` has the general category for control codes.
1103    ///
1104    /// Control codes (code points with the general category of `Cc`) are [described] in Chapter 23
1105    /// (Special Areas and Format Characters) of the Unicode Standard, and [specified] in the Unicode Character
1106    /// Database [`UnicodeData.txt`]. The full set of Unicode control codes is
1107    /// `'\0'..='\x1f' | '\x7f'..='\u{9f}'`, and will never change.
1108    ///
1109    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-23/#G20365
1110    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1111    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1112    ///
1113    /// # Examples
1114    ///
1115    /// Basic usage:
1116    ///
1117    /// ```
1118    /// assert!('\t'.is_control());
1119    /// assert!('\n'.is_control());
1120    /// assert!('\u{9C}'.is_control()); // STRING TERMINATOR
1121    /// assert!(!'q'.is_control());
1122    /// ```
1123    #[ferrocene::prevalidated]
1124    #[must_use]
1125    #[stable(feature = "rust1", since = "1.0.0")]
1126    #[rustc_const_stable(feature = "const_is_control", since = "1.97.0")]
1127    #[inline]
1128    pub const fn is_control(self) -> bool {
1129        // According to
1130        // https://www.unicode.org/policies/stability_policy.html#Property_Value,
1131        // the set of codepoints in `Cc` will never change.
1132        // So we can just hard-code the patterns to match against instead of using a table.
1133        matches!(self, '\0'..='\x1f' | '\x7f'..='\u{9f}')
1134    }
1135
1136    /// Returns `true` if this `char` has the general category for [private-use characters].
1137    /// These characters do not have an interpretation specified by Unicode; individual programs
1138    /// and users are free to assign them whatever meaning they like.
1139    ///
1140    /// [private-use characters]: https://www.unicode.org/faq/private_use#private_use
1141    ///
1142    /// Private-use characters (code points with the general category of `Co`) are [described] in Chapter 23
1143    /// (Special Areas and Format Characters) of the Unicode Standard, and [specified] in the
1144    /// Unicode Character Database [`UnicodeData.txt`]. The full set of private-use characters is
1145    /// `'\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}'`,
1146    /// and will never change.
1147    ///
1148    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-23/#G19184
1149    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1150    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1151    ///
1152    #[ferrocene::prevalidated]
1153    #[must_use]
1154    #[unstable(feature = "char_unassigned_private_use", issue = "158322")]
1155    #[inline]
1156    pub const fn is_private_use(self) -> bool {
1157        // According to
1158        // https://www.unicode.org/policies/stability_policy.html#Property_Value,
1159        // the set of codepoints in `Co` will never change.
1160        // So we can just hard-code the patterns to match against instead of using a table.
1161        matches!(self, '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}')
1162    }
1163
1164    /// Returns `true` if this `char` has the general category for format control characters.
1165    ///
1166    /// Format controls (code points with the general category of `Cf`) are [described] in Chapter 4
1167    /// (Character Properties) of the Unicode Standard, and [specified] in the Unicode Character
1168    /// Database [`UnicodeData.txt`].
1169    ///
1170    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G134153
1171    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1172    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1173    ///
1174    /// # Examples
1175    ///
1176    /// Basic usage:
1177    ///
1178    /// ```ignore(private)
1179    /// assert!('\u{AD}'.is_format_control()); // SOFT HYPHEN
1180    /// assert!('\u{200B}'.is_format_control()); // ZERO WIDTH SPACE
1181    /// assert!('\u{E0041}'.is_format_control()); // TAG LATIN CAPITAL LETTER A
1182    /// assert!('۝'.is_format_control()); // ARABIC END OF AYAH
1183    /// assert!('𓐲'.is_format_control()); // EGYPTIAN HIEROGLYPH INSERT AT TOP START
1184    /// assert!(!'q'.is_format_control());
1185    /// ```
1186    #[ferrocene::prevalidated]
1187    #[must_use]
1188    #[inline]
1189    fn is_format_control(self) -> bool {
1190        self > '\u{AC}' && unicode::Cf(self)
1191    }
1192
1193    /// Returns `true` if this `char` has been assigned a meaning by Unicode, as of
1194    /// [`UNICODE_VERSION`].
1195    ///
1196    /// [`UNICODE_VERSION`]: Self::UNICODE_VERSION
1197    ///
1198    /// Many of Unicode's [stability policies] apply only to assigned characters.
1199    ///
1200    /// [stability policies]: https://www.unicode.org/policies/stability_policy.html
1201    ///
1202    /// Currently unassigned characters (characters for which this method returns `false`)
1203    /// may have a meaning assigned in a future version of Unicode,
1204    /// except for the 66 [noncharacters] which will never be assigned a meaning.
1205    ///
1206    /// [noncharacters]: https://www.unicode.org/faq/private_use.html#noncharacters
1207    ///
1208    /// A character is considered assigned if it is present in [`UnicodeData.txt`].
1209    /// Unassigned characters have general category `Cn`, as [described] in Chapter 4
1210    /// (Character Properties) of the Unicode Standard.
1211    ///
1212    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1213    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G134153
1214    ///
1215    /// # Examples
1216    ///
1217    /// Basic usage:
1218    ///
1219    /// ```
1220    /// #![feature(char_unassigned_private_use)]
1221    /// assert!('γ'.is_assigned()); // once a character is assigned, it stays assigned forever
1222    /// assert!(!'\u{FFFE}'.is_assigned()); // noncharacter, will never be assigned
1223    ///
1224    /// // Not currently assigned, but may be in the future,
1225    /// // so we shouldn't rely on the current status
1226    /// /* assert!(!'\u{7AAAA}'.is_assigned()); */
1227    /// ```
1228    #[ferrocene::prevalidated]
1229    #[must_use]
1230    #[unstable(feature = "char_unassigned_private_use", issue = "158322")]
1231    #[inline]
1232    pub fn is_assigned(self) -> bool {
1233        match self {
1234            '\0'..='\u{377}' => true,
1235            '\u{378}'..='\u{3FFFD}' => !unicode::Cn_planes_0_3(self),
1236            // Assigned character ranges in planes 4 and above.
1237            // `src/tools/unicode-table-generator/src/main.rs` asserts that this is correct
1238            '\u{E0001}'
1239            | '\u{E0020}'..='\u{E007F}'
1240            | '\u{E0100}'..='\u{E01EF}'
1241            | '\u{F0000}'..='\u{FFFFD}'
1242            | '\u{100000}'..='\u{10FFFD}' => true,
1243            _ => false,
1244        }
1245    }
1246
1247    /// Returns `true` if this `char` has the `Default_Ignorable_Code_Point` property.
1248    /// These characters [should be displayed as invisible in fallback rendering](https://www.unicode.org/faq/unsup_char#3).
1249    ///
1250    /// `Default_Ignorable_Code_Point` is [described] in Chapter 5 (Implementation Guidelines) of the Unicode Standard,
1251    /// and [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
1252    ///
1253    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-5/#G40120
1254    /// [specified]: https://www.unicode.org/reports/tr44/#Default_Ignorable_Code_Point
1255    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1256    ///
1257    /// # Examples
1258    ///
1259    /// Basic usage:
1260    ///
1261    /// ```ignore(private)
1262    /// assert!('\u{AD}'.is_default_ignorable()); // SOFT HYPHEN
1263    /// assert!('\u{115F}'.is_default_ignorable()); // HANGUL CHOSEONG FILLER
1264    /// assert!('\u{200B}'.is_default_ignorable()); // ZERO WIDTH SPACE
1265    /// assert!('\u{E0041}'.is_default_ignorable()); // TAG LATIN CAPITAL LETTER A
1266    /// assert!(!'۝'.is_default_ignorable()); // ARABIC END OF AYAH
1267    /// assert!(!'𓐲'.is_default_ignorable()); // EGYPTIAN HIEROGLYPH INSERT AT TOP START
1268    /// assert!(!' '.is_default_ignorable());
1269    /// assert!(!'\n'.is_default_ignorable());
1270    /// assert!(!'\0'.is_default_ignorable());
1271    /// assert!(!'q'.is_default_ignorable());
1272    #[ferrocene::prevalidated]
1273    #[must_use]
1274    #[inline]
1275    fn is_default_ignorable(self) -> bool {
1276        self > '\u{AC}' && unicode::Default_Ignorable_Code_Point(self)
1277    }
1278
1279    /// Returns `true` if this `char` has the `Grapheme_Extend` property.
1280    ///
1281    /// `Grapheme_Extend` is [described] in Chapter 3 (Conformance) of the Unicode Standard,
1282    /// and [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
1283    ///
1284    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G41165
1285    /// [specified]: https://www.unicode.org/reports/tr44/#Grapheme_Extend
1286    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1287    #[ferrocene::prevalidated]
1288    #[must_use]
1289    #[inline]
1290    fn is_grapheme_extender(self) -> bool {
1291        self > '\u{02FF}' && unicode::Grapheme_Extend(self)
1292    }
1293
1294    /// Returns `true` if this `char` has the `Case_Ignorable` property. This narrow-use property
1295    /// is used to implement context-dependent casing for the Greek letter sigma (uppercase 'Σ'),
1296    /// which has two lowercase forms.
1297    ///
1298    /// `Case_Ignorable` is [described] in Chapter 3 (Conformance) of the Unicode Core Specification,
1299    /// and [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
1300    /// See those resources, as well as [`to_lowercase()`]'s documentation, for more information.
1301    ///
1302    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G63116
1303    /// [specified]: https://www.unicode.org/reports/tr44/#Case_Ignorable
1304    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1305    /// [`to_lowercase()`]: Self::to_lowercase()
1306    #[must_use]
1307    #[inline]
1308    #[unstable(feature = "case_ignorable", issue = "154848")]
1309    pub fn is_case_ignorable(self) -> bool {
1310        if self.is_ascii() {
1311            matches!(self, '\'' | '.' | ':' | '^' | '`')
1312        } else {
1313            unicode::Case_Ignorable(self)
1314        }
1315    }
1316
1317    /// Returns an iterator that yields the lowercase mapping of this `char` as one or more
1318    /// `char`s.
1319    ///
1320    /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
1321    ///
1322    /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
1323    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1324    ///
1325    /// [ucd]: https://www.unicode.org/reports/tr44/
1326    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1327    ///
1328    /// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
1329    /// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
1330    ///
1331    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1332    /// is independent of context and language. See [below](#notes-on-context-and-locale)
1333    /// for more information.
1334    ///
1335    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1336    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1337    ///
1338    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1339    ///
1340    /// # Examples
1341    ///
1342    /// As an iterator:
1343    ///
1344    /// ```
1345    /// for c in 'İ'.to_lowercase() {
1346    ///     print!("{c}");
1347    /// }
1348    /// println!();
1349    /// ```
1350    ///
1351    /// Using `println!` directly:
1352    ///
1353    /// ```
1354    /// println!("{}", 'İ'.to_lowercase());
1355    /// ```
1356    ///
1357    /// Both are equivalent to:
1358    ///
1359    /// ```
1360    /// println!("i\u{307}");
1361    /// ```
1362    ///
1363    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1364    ///
1365    /// ```
1366    /// assert_eq!('C'.to_lowercase().to_string(), "c");
1367    ///
1368    /// // Sometimes the result is more than one character:
1369    /// assert_eq!('İ'.to_lowercase().to_string(), "i\u{307}");
1370    ///
1371    /// // Characters that do not have both uppercase and lowercase
1372    /// // convert into themselves.
1373    /// assert_eq!('山'.to_lowercase().to_string(), "山");
1374    /// ```
1375    /// # Notes on context and locale
1376    ///
1377    /// As stated earlier, this method does not take into account language or context.
1378    /// Below is a non-exhaustive list of situations where this can be relevant.
1379    /// If you need to handle locale-depedendent casing in your code, consider using
1380    /// an external crate, like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1381    /// which is developed by Unicode.
1382    ///
1383    /// ## Greek sigma
1384    ///
1385    /// In Greek, the letter simga (uppercase 'Σ') has two lowercase forms:
1386    /// 'σ' which is used in most situations, and 'ς' which appears only
1387    /// at the end of a word. [`char::to_lowercase()`] always uses the first form:
1388    ///
1389    /// ```
1390    /// assert_eq!('Σ'.to_lowercase().to_string(), "σ");
1391    /// ```
1392    ///
1393    /// `str::to_lowercase()` (only available with the `alloc` crate)
1394    /// *does* properly handle this contextual mapping,
1395    /// so prefer using that method if you can. Alternatively, you can use
1396    /// [`is_cased()`] and [`is_case_ignorable()`] to implement it yourself.
1397    /// See `Final_Sigma` in [Table 3.17] of the Unicode Standard,
1398    /// along with [`SpecialCasing.txt`], for more details.
1399    ///
1400    /// [`is_cased()`]: Self::is_cased()
1401    /// [`is_case_ignorable()`]: Self::is_case_ignorable()
1402    /// [Table 3.17]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G54277
1403    ///
1404    /// ## Turkish and Azeri I/ı/İ/i
1405    ///
1406    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1407    ///
1408    /// * 'Dotless': I / ı, sometimes written ï
1409    /// * 'Dotted': İ / i
1410    ///
1411    /// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
1412    ///
1413    /// ```
1414    /// let lower_i = 'I'.to_lowercase().to_string();
1415    /// ```
1416    ///
1417    /// `'I'`'s correct lowercase relies on the language of the text: if we're
1418    /// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
1419    /// be `"ı"`. `to_lowercase()` does not take this into account, and so:
1420    ///
1421    /// ```
1422    /// let lower_i = 'I'.to_lowercase().to_string();
1423    ///
1424    /// assert_eq!(lower_i, "i");
1425    /// ```
1426    ///
1427    /// holds across languages.
1428    ///
1429    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1430    #[must_use = "this returns the lowercased character as a new iterator, \
1431                  without modifying the original"]
1432    #[stable(feature = "rust1", since = "1.0.0")]
1433    #[inline]
1434    pub fn to_lowercase(self) -> ToLowercase {
1435        ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
1436    }
1437
1438    /// Returns an iterator that yields the titlecase mapping of this `char` as one or more
1439    /// `char`s.
1440    ///
1441    /// This is usually, but not always, equivalent to the uppercase mapping
1442    /// returned by [`to_uppercase()`]. Prefer this method when seeking to capitalize
1443    /// Only The First Letter of a word, but use [`to_uppercase()`] for ALL CAPS.
1444    /// See [below](#difference-from-uppercase) for a thorough explanation
1445    /// of the difference between the two methods.
1446    ///
1447    /// If this `char` does not have a titlecase mapping, the iterator yields the same `char`.
1448    ///
1449    /// If this `char` has a one-to-one titlecase mapping given by the [Unicode Character
1450    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1451    ///
1452    /// [ucd]: https://www.unicode.org/reports/tr44/
1453    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1454    ///
1455    /// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
1456    /// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
1457    ///
1458    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1459    ///
1460    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1461    /// is independent of context and language. See [below](#note-on-locale)
1462    /// for more information.
1463    ///
1464    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1465    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1466    ///
1467    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1468    ///
1469    /// # Examples
1470    ///
1471    /// As an iterator:
1472    ///
1473    /// ```
1474    /// #![feature(titlecase)]
1475    /// for c in 'ß'.to_titlecase() {
1476    ///     print!("{c}");
1477    /// }
1478    /// println!();
1479    /// ```
1480    ///
1481    /// Using `println!` directly:
1482    ///
1483    /// ```
1484    /// #![feature(titlecase)]
1485    /// println!("{}", 'ß'.to_titlecase());
1486    /// ```
1487    ///
1488    /// Both are equivalent to:
1489    ///
1490    /// ```
1491    /// println!("Ss");
1492    /// ```
1493    ///
1494    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1495    ///
1496    /// ```
1497    /// #![feature(titlecase)]
1498    /// assert_eq!('c'.to_titlecase().to_string(), "C");
1499    /// assert_eq!('ა'.to_titlecase().to_string(), "ა");
1500    /// assert_eq!('dž'.to_titlecase().to_string(), "Dž");
1501    /// assert_eq!('ᾨ'.to_titlecase().to_string(), "ᾨ");
1502    ///
1503    /// // Sometimes the result is more than one character:
1504    /// assert_eq!('ß'.to_titlecase().to_string(), "Ss");
1505    ///
1506    /// // Characters that do not have separate cased forms
1507    /// // convert into themselves.
1508    /// assert_eq!('山'.to_titlecase().to_string(), "山");
1509    /// ```
1510    ///
1511    /// # Difference from uppercase
1512    ///
1513    /// Currently, there are three classes of characters where [`to_uppercase()`]
1514    /// and `to_titlecase()` give different results:
1515    ///
1516    /// ## Georgian script
1517    ///
1518    /// Each letter in the modern Georgian alphabet can be written in one of two forms:
1519    /// the typical lowercase-like "mkhedruli" form, and a variant uppercase-like "mtavruli"
1520    /// form. However, unlike uppercase in most cased scripts, mtavruli is not typically used
1521    /// to start sentences, denote proper nouns, or for any other purpose
1522    /// in running text. It is instead confined to titles and headings, which are written entirely
1523    /// in mtavruli. For this reason, [`to_uppercase()`] applied to a Georgian letter
1524    /// will return the mtavruli form, but `to_titlecase()` will return the mkhedruli form.
1525    ///
1526    /// ```
1527    /// #![feature(titlecase)]
1528    /// let ani = 'ა'; // First letter of the Georgian alphabet, in mkhedruli form
1529    ///
1530    /// // Titlecasing mkhedruli maps it to itself...
1531    /// assert_eq!(ani.to_titlecase().to_string(), ani.to_string());
1532    ///
1533    /// // but uppercasing it maps it to mtavruli
1534    /// assert_eq!(ani.to_uppercase().to_string(), "Ა");
1535    /// ```
1536    ///
1537    /// ## Compatibility digraphs for Latin-alphabet Serbo-Croatian
1538    ///
1539    /// The standard Latin alphabet for the Serbo-Croatian language
1540    /// (Bosnian, Croatian, Montenegrin, and Serbian) contains
1541    /// three digraphs: Dž, Lj, and Nj. These are usually represented as
1542    /// two characters. However, for compatibility with older character sets,
1543    /// Unicode includes single-character versions of these digraphs.
1544    /// Each has a uppercase, titlecase, and lowercase version:
1545    ///
1546    /// - `'DŽ'`, `'Dž'`, `'dž'`
1547    /// - `'LJ'`, `'Lj'`, `'lj'`
1548    /// - `'NJ'`, `'Nj'`, `'nj'`
1549    ///
1550    /// Unicode additionally encodes a casing triad for the Dz digraph
1551    /// without the caron: `'DZ'`, `'Dz'`, `'dz'`.
1552    ///
1553    /// ## Iota-subscritped Greek vowels
1554    ///
1555    /// In ancient Greek, the long vowels alpha (α), eta (η), and omega (ω)
1556    /// were sometimes followed by an iota (ι), forming a diphthong. Over time,
1557    /// the diphthong pronunciation was slowly lost, with the iota becoming mute.
1558    /// Eventually, the ι disappeared from the spelling as well.
1559    /// However, there remains a need to represent ancient texts faithfully.
1560    ///
1561    /// Modern editions of ancient Greek texts commonly use a reduced-sized
1562    /// ι symbol to denote mute iotas, while distinguishing them from ιs
1563    /// which continued to affect pronunciation. The exact standard differs
1564    /// between different publications. Some render the mute ι below its associated
1565    /// vowel (subscript), while others place it to the right of said vowel (adscript).
1566    /// The interaction of mute ι symbols with casing also varies.
1567    ///
1568    /// The Unicode Standard, for its default casing rules, chose to make lowercase
1569    /// Greek vowels with iota subscipt (e.g. `'ᾠ'`) titlecase to the uppercase vowel
1570    /// with iota subscript (`'ᾨ'`) but uppercase to the uppercase vowel followed by
1571    /// full-size uppercase iota (`"ὨΙ"`). This is just one convention among many
1572    /// in common use, but it is the one Unicode settled on,
1573    /// so it is what this method does also.
1574    ///
1575    /// # Note on locale
1576    ///
1577    /// As stated above, this method is locale-insensitive.
1578    /// If you need locale support, consider using an external crate,
1579    /// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1580    /// which is developed by Unicode. A description of one common
1581    /// locale-dependent casing issue follows (there are others):
1582    ///
1583    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1584    ///
1585    /// * 'Dotless': I / ı, sometimes written ï
1586    /// * 'Dotted': İ / i
1587    ///
1588    /// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
1589    ///
1590    /// ```
1591    /// #![feature(titlecase)]
1592    /// let upper_i = 'i'.to_titlecase().to_string();
1593    /// ```
1594    ///
1595    /// `'i'`'s correct titlecase relies on the language of the text: if we're
1596    /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
1597    /// be `"İ"`. `to_titlecase()` does not take this into account, and so:
1598    ///
1599    /// ```
1600    /// #![feature(titlecase)]
1601    /// let upper_i = 'i'.to_titlecase().to_string();
1602    ///
1603    /// assert_eq!(upper_i, "I");
1604    /// ```
1605    ///
1606    /// holds across languages.
1607    ///
1608    /// [`to_uppercase()`]: Self::to_uppercase()
1609    #[must_use = "this returns the titlecased character as a new iterator, \
1610                  without modifying the original"]
1611    #[unstable(feature = "titlecase", issue = "153892")]
1612    #[inline]
1613    pub fn to_titlecase(self) -> ToTitlecase {
1614        ToTitlecase(CaseMappingIter::new(conversions::to_title(self)))
1615    }
1616
1617    /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
1618    /// `char`s.
1619    ///
1620    /// Prefer this method when converting a word into ALL CAPS, but consider [`to_titlecase()`]
1621    /// instead if you seek to capitalize Only The First Letter. See that method's documentation
1622    /// for more information on the difference between the two.
1623    ///
1624    /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`.
1625    ///
1626    /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
1627    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1628    ///
1629    /// [ucd]: https://www.unicode.org/reports/tr44/
1630    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1631    ///
1632    /// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
1633    /// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
1634    ///
1635    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1636    ///
1637    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1638    /// is independent of context and language. See [below](#note-on-locale)
1639    /// for more information.
1640    ///
1641    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1642    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1643    ///
1644    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1645    ///
1646    /// # Examples
1647    ///
1648    /// `'ſt'` (U+FB05) is a single Unicode code point (a ligature) that maps to "ST" in uppercase.
1649    ///
1650    /// As an iterator:
1651    ///
1652    /// ```
1653    /// for c in 'ſt'.to_uppercase() {
1654    ///     print!("{c}");
1655    /// }
1656    /// println!();
1657    /// ```
1658    ///
1659    /// Using `println!` directly:
1660    ///
1661    /// ```
1662    /// println!("{}", 'ſt'.to_uppercase());
1663    /// ```
1664    ///
1665    /// Both are equivalent to:
1666    ///
1667    /// ```
1668    /// println!("ST");
1669    /// ```
1670    ///
1671    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1672    ///
1673    /// ```
1674    /// assert_eq!('c'.to_uppercase().to_string(), "C");
1675    /// assert_eq!('ა'.to_uppercase().to_string(), "Ა");
1676    /// assert_eq!('dž'.to_uppercase().to_string(), "DŽ");
1677    ///
1678    /// // Sometimes the result is more than one character:
1679    /// assert_eq!('ſt'.to_uppercase().to_string(), "ST");
1680    /// assert_eq!('ᾨ'.to_uppercase().to_string(), "ὨΙ");
1681    ///
1682    /// // Characters that do not have both uppercase and lowercase
1683    /// // convert into themselves.
1684    /// assert_eq!('山'.to_uppercase().to_string(), "山");
1685    /// ```
1686    ///
1687    /// # Note on locale
1688    ///
1689    /// As stated above, this method is locale-insensitive.
1690    /// If you need locale support, consider using an external crate,
1691    /// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1692    /// which is developed by Unicode. A description of one common
1693    /// locale-dependent casing issue follows (there are others):
1694    ///
1695    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1696    ///
1697    /// * 'Dotless': I / ı, sometimes written ï
1698    /// * 'Dotted': İ / i
1699    ///
1700    /// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
1701    ///
1702    /// ```
1703    /// let upper_i = 'i'.to_uppercase().to_string();
1704    /// ```
1705    ///
1706    /// `'i'`'s correct uppercase relies on the language of the text: if we're
1707    /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
1708    /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
1709    ///
1710    /// ```
1711    /// let upper_i = 'i'.to_uppercase().to_string();
1712    ///
1713    /// assert_eq!(upper_i, "I");
1714    /// ```
1715    ///
1716    /// holds across languages.
1717    ///
1718    /// [`to_titlecase()`]: Self::to_titlecase()
1719    #[must_use = "this returns the uppercased character as a new iterator, \
1720                  without modifying the original"]
1721    #[stable(feature = "rust1", since = "1.0.0")]
1722    #[inline]
1723    pub fn to_uppercase(self) -> ToUppercase {
1724        ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
1725    }
1726
1727    /// Returns an iterator that yields the case folding of this `char` as one or more
1728    /// `char`s.
1729    ///
1730    /// Case folding is meant to be used when performing case-insensitive string comparisons.
1731    /// Case-folded strings should not usually be exposed directly to users. For most,
1732    /// but not all, characters, the casefold mapping is identical to the lowercase one.
1733    ///
1734    /// This iterator yields the `char`(s) in the common or full case folding for this `char`,
1735    /// as given by the [Unicode Character Database][ucd] [`CaseFolding.txt`].
1736    /// The maximum number of `char`s in a case folding is 3.
1737    ///
1738    /// [ucd]: https://www.unicode.org/reports/tr44/
1739    /// [`CaseFolding.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt
1740    ///
1741    ///
1742    /// No [normalization] (e.g. NFC) is performed, so visually and semantically identical characters
1743    /// might still casefold differently. For example, `'ά'` (U+03AC GREEK SMALL LETTER ALPHA WITH TONOS)
1744    /// is considered distinct from `'ά'` (U+1F71 GREEK SMALL LETTER ALPHA WITH OXIA),
1745    /// even though Unicode considers them canonically equivalent.
1746    ///
1747    /// In addition, this method is independent of language/locale,
1748    /// so the special behavior of I/ı/İ/i in Turkish and Azeri is not handled.
1749    ///
1750    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case folding in
1751    /// general and Chapter 3 (Conformance) discusses the default algorithm for case folding.
1752    ///
1753    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1754    ///
1755    /// # Examples
1756    ///
1757    /// The German sharp S `'ß'` (U+DF) is a single Unicode code point
1758    /// that casefolds to `"ss"`. Its uppercase variant '`ẞ`' (U+1E9E)
1759    /// has the same case-folding.
1760    ///
1761    /// As an iterator:
1762    ///
1763    /// ```
1764    /// #![feature(casefold)]
1765    /// assert!('ß'.to_casefold_unnormalized().eq(['s', 's']));
1766    /// assert!('ẞ'.to_casefold_unnormalized().eq(['s', 's']));
1767    /// ```
1768    ///
1769    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1770    ///
1771    /// ```
1772    /// #![feature(casefold)]
1773    /// assert_eq!('ß'.to_casefold_unnormalized().to_string(), "ss");
1774    /// assert_eq!('ẞ'.to_casefold_unnormalized().to_string(), "ss");
1775    /// ```
1776    ///
1777    /// No [normalization] is performed:
1778    ///
1779    /// ```rust
1780    /// #![feature(casefold)]
1781    /// // These two characters are visually and semantically identical;
1782    /// // Unicode considers them to be canonically equivalent.
1783    /// let alpha_tonos = 'ά';
1784    /// let alpha_oxia = 'ά';
1785    ///
1786    /// // However, they are different codepoints:
1787    /// assert_eq!(alpha_tonos, '\u{03AC}');
1788    /// assert_eq!(alpha_oxia, '\u{1F71}');
1789    ///
1790    /// // Their case-foldings are likewise unequal:
1791    /// assert!(alpha_tonos.to_casefold_unnormalized().eq(['\u{03AC}']));
1792    /// assert!(alpha_oxia.to_casefold_unnormalized().eq(['\u{1F71}']));
1793    /// ```
1794    ///
1795    /// # Note on locale
1796    ///
1797    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1798    ///
1799    /// * 'Dotless': I / ı, sometimes written ï
1800    /// * 'Dotted': İ / i
1801    ///
1802    /// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
1803    ///
1804    /// ```
1805    /// #![feature(casefold)]
1806    /// let casefold_i = 'I'.to_casefold_unnormalized().to_string();
1807    /// ```
1808    ///
1809    /// `'I'`'s correct case folding relies on the language of the text: if we're
1810    /// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
1811    /// be `"ı"`. `to_casefold_unnormalized()` does not take this into account, and so:
1812    ///
1813    /// ```
1814    /// #![feature(casefold)]
1815    /// let casefold_i = 'I'.to_casefold_unnormalized().to_string();
1816    ///
1817    /// assert_eq!(casefold_i, "i");
1818    /// ```
1819    ///
1820    /// holds across languages.
1821    ///
1822    /// [normalization]: https://www.unicode.org/faq/normalization.html
1823    #[must_use = "this returns the case-folded character as a new iterator, \
1824                  without modifying the original"]
1825    #[unstable(feature = "casefold", issue = "154742")]
1826    #[inline]
1827    pub fn to_casefold_unnormalized(self) -> ToCasefold {
1828        ToCasefold(CaseMappingIter::new(conversions::to_casefold(self)))
1829    }
1830
1831    /// Checks if the value is within the ASCII range.
1832    ///
1833    /// # Examples
1834    ///
1835    /// ```
1836    /// let ascii = 'a';
1837    /// let non_ascii = '❤';
1838    ///
1839    /// assert!(ascii.is_ascii());
1840    /// assert!(!non_ascii.is_ascii());
1841    /// ```
1842    #[must_use]
1843    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1844    #[rustc_const_stable(feature = "const_char_is_ascii", since = "1.32.0")]
1845    #[rustc_diagnostic_item = "char_is_ascii"]
1846    #[inline]
1847    #[ferrocene::prevalidated]
1848    pub const fn is_ascii(&self) -> bool {
1849        *self as u32 <= 0x7F
1850    }
1851
1852    /// Returns `Some` if the value is within the ASCII range,
1853    /// or `None` if it's not.
1854    ///
1855    /// This is preferred to [`Self::is_ascii`] when you're passing the value
1856    /// along to something else that can take [`ascii::Char`] rather than
1857    /// needing to check again for itself whether the value is in ASCII.
1858    #[must_use]
1859    #[unstable(feature = "ascii_char", issue = "110998")]
1860    #[inline]
1861    #[ferrocene::prevalidated]
1862    pub const fn as_ascii(&self) -> Option<ascii::Char> {
1863        if self.is_ascii() {
1864            // SAFETY: Just checked that this is ASCII.
1865            Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) })
1866        } else {
1867            None
1868        }
1869    }
1870
1871    /// Converts this char into an [ASCII character](`ascii::Char`), without
1872    /// checking whether it is valid.
1873    ///
1874    /// # Safety
1875    ///
1876    /// This char must be within the ASCII range, or else this is UB.
1877    #[must_use]
1878    #[unstable(feature = "ascii_char", issue = "110998")]
1879    #[inline]
1880    pub const unsafe fn as_ascii_unchecked(&self) -> ascii::Char {
1881        assert_unsafe_precondition!(
1882            check_library_ub,
1883            "as_ascii_unchecked requires that the char is valid ASCII",
1884            (it: &char = self) => it.is_ascii()
1885        );
1886
1887        // SAFETY: the caller promised that this char is ASCII.
1888        unsafe { ascii::Char::from_u8_unchecked(*self as u8) }
1889    }
1890
1891    /// Makes a copy of the value in its ASCII upper case equivalent.
1892    ///
1893    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1894    /// but non-ASCII letters are unchanged.
1895    ///
1896    /// To uppercase the value in-place, use [`make_ascii_uppercase()`].
1897    ///
1898    /// To uppercase ASCII characters in addition to non-ASCII characters, use
1899    /// [`to_uppercase()`].
1900    ///
1901    /// # Examples
1902    ///
1903    /// ```
1904    /// let ascii = 'a';
1905    /// let non_ascii = '❤';
1906    ///
1907    /// assert_eq!('A', ascii.to_ascii_uppercase());
1908    /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
1909    /// ```
1910    ///
1911    /// [`make_ascii_uppercase()`]: #method.make_ascii_uppercase
1912    /// [`to_uppercase()`]: #method.to_uppercase
1913    #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`"]
1914    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1915    #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1916    #[inline]
1917    pub const fn to_ascii_uppercase(&self) -> char {
1918        if self.is_ascii_lowercase() {
1919            (*self as u8).ascii_change_case_unchecked() as char
1920        } else {
1921            *self
1922        }
1923    }
1924
1925    /// Makes a copy of the value in its ASCII lower case equivalent.
1926    ///
1927    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1928    /// but non-ASCII letters are unchanged.
1929    ///
1930    /// To lowercase the value in-place, use [`make_ascii_lowercase()`].
1931    ///
1932    /// To lowercase ASCII characters in addition to non-ASCII characters, use
1933    /// [`to_lowercase()`].
1934    ///
1935    /// # Examples
1936    ///
1937    /// ```
1938    /// let ascii = 'A';
1939    /// let non_ascii = '❤';
1940    ///
1941    /// assert_eq!('a', ascii.to_ascii_lowercase());
1942    /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
1943    /// ```
1944    ///
1945    /// [`make_ascii_lowercase()`]: #method.make_ascii_lowercase
1946    /// [`to_lowercase()`]: #method.to_lowercase
1947    #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`"]
1948    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1949    #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1950    #[inline]
1951    pub const fn to_ascii_lowercase(&self) -> char {
1952        if self.is_ascii_uppercase() {
1953            (*self as u8).ascii_change_case_unchecked() as char
1954        } else {
1955            *self
1956        }
1957    }
1958
1959    /// Checks that two values are an ASCII case-insensitive match.
1960    ///
1961    /// Equivalent to <code>[to_ascii_lowercase]\(a) == [to_ascii_lowercase]\(b)</code>.
1962    ///
1963    /// # Examples
1964    ///
1965    /// ```
1966    /// let upper_a = 'A';
1967    /// let lower_a = 'a';
1968    /// let lower_z = 'z';
1969    ///
1970    /// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
1971    /// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
1972    /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
1973    /// ```
1974    ///
1975    /// [to_ascii_lowercase]: #method.to_ascii_lowercase
1976    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1977    #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1978    #[inline]
1979    pub const fn eq_ignore_ascii_case(&self, other: &char) -> bool {
1980        self.to_ascii_lowercase() == other.to_ascii_lowercase()
1981    }
1982
1983    /// Converts this type to its ASCII upper case equivalent in-place.
1984    ///
1985    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1986    /// but non-ASCII letters are unchanged.
1987    ///
1988    /// To return a new uppercased value without modifying the existing one, use
1989    /// [`to_ascii_uppercase()`].
1990    ///
1991    /// # Examples
1992    ///
1993    /// ```
1994    /// let mut ascii = 'a';
1995    ///
1996    /// ascii.make_ascii_uppercase();
1997    ///
1998    /// assert_eq!('A', ascii);
1999    /// ```
2000    ///
2001    /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
2002    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2003    #[rustc_const_stable(feature = "const_make_ascii", since = "1.84.0")]
2004    #[inline]
2005    pub const fn make_ascii_uppercase(&mut self) {
2006        *self = self.to_ascii_uppercase();
2007    }
2008
2009    /// Converts this type to its ASCII lower case equivalent in-place.
2010    ///
2011    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
2012    /// but non-ASCII letters are unchanged.
2013    ///
2014    /// To return a new lowercased value without modifying the existing one, use
2015    /// [`to_ascii_lowercase()`].
2016    ///
2017    /// # Examples
2018    ///
2019    /// ```
2020    /// let mut ascii = 'A';
2021    ///
2022    /// ascii.make_ascii_lowercase();
2023    ///
2024    /// assert_eq!('a', ascii);
2025    /// ```
2026    ///
2027    /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
2028    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2029    #[rustc_const_stable(feature = "const_make_ascii", since = "1.84.0")]
2030    #[inline]
2031    pub const fn make_ascii_lowercase(&mut self) {
2032        *self = self.to_ascii_lowercase();
2033    }
2034
2035    /// Checks if the value is an ASCII alphabetic character:
2036    ///
2037    /// - U+0041 'A' ..= U+005A 'Z', or
2038    /// - U+0061 'a' ..= U+007A 'z'.
2039    ///
2040    /// # Examples
2041    ///
2042    /// ```
2043    /// let uppercase_a = 'A';
2044    /// let uppercase_g = 'G';
2045    /// let a = 'a';
2046    /// let g = 'g';
2047    /// let zero = '0';
2048    /// let percent = '%';
2049    /// let space = ' ';
2050    /// let lf = '\n';
2051    /// let esc = '\x1b';
2052    ///
2053    /// assert!(uppercase_a.is_ascii_alphabetic());
2054    /// assert!(uppercase_g.is_ascii_alphabetic());
2055    /// assert!(a.is_ascii_alphabetic());
2056    /// assert!(g.is_ascii_alphabetic());
2057    /// assert!(!zero.is_ascii_alphabetic());
2058    /// assert!(!percent.is_ascii_alphabetic());
2059    /// assert!(!space.is_ascii_alphabetic());
2060    /// assert!(!lf.is_ascii_alphabetic());
2061    /// assert!(!esc.is_ascii_alphabetic());
2062    /// ```
2063    #[must_use]
2064    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2065    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2066    #[inline]
2067    pub const fn is_ascii_alphabetic(&self) -> bool {
2068        matches!(*self, 'a'..='z' | 'A'..='Z')
2069    }
2070
2071    /// Checks if the value is an ASCII uppercase character:
2072    /// U+0041 'A' ..= U+005A 'Z'.
2073    ///
2074    /// # Examples
2075    ///
2076    /// ```
2077    /// let uppercase_a = 'A';
2078    /// let uppercase_g = 'G';
2079    /// let a = 'a';
2080    /// let g = 'g';
2081    /// let zero = '0';
2082    /// let percent = '%';
2083    /// let space = ' ';
2084    /// let lf = '\n';
2085    /// let esc = '\x1b';
2086    ///
2087    /// assert!(uppercase_a.is_ascii_uppercase());
2088    /// assert!(uppercase_g.is_ascii_uppercase());
2089    /// assert!(!a.is_ascii_uppercase());
2090    /// assert!(!g.is_ascii_uppercase());
2091    /// assert!(!zero.is_ascii_uppercase());
2092    /// assert!(!percent.is_ascii_uppercase());
2093    /// assert!(!space.is_ascii_uppercase());
2094    /// assert!(!lf.is_ascii_uppercase());
2095    /// assert!(!esc.is_ascii_uppercase());
2096    /// ```
2097    #[must_use]
2098    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2099    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2100    #[inline]
2101    pub const fn is_ascii_uppercase(&self) -> bool {
2102        matches!(*self, 'A'..='Z')
2103    }
2104
2105    /// Checks if the value is an ASCII lowercase character:
2106    /// U+0061 'a' ..= U+007A 'z'.
2107    ///
2108    /// # Examples
2109    ///
2110    /// ```
2111    /// let uppercase_a = 'A';
2112    /// let uppercase_g = 'G';
2113    /// let a = 'a';
2114    /// let g = 'g';
2115    /// let zero = '0';
2116    /// let percent = '%';
2117    /// let space = ' ';
2118    /// let lf = '\n';
2119    /// let esc = '\x1b';
2120    ///
2121    /// assert!(!uppercase_a.is_ascii_lowercase());
2122    /// assert!(!uppercase_g.is_ascii_lowercase());
2123    /// assert!(a.is_ascii_lowercase());
2124    /// assert!(g.is_ascii_lowercase());
2125    /// assert!(!zero.is_ascii_lowercase());
2126    /// assert!(!percent.is_ascii_lowercase());
2127    /// assert!(!space.is_ascii_lowercase());
2128    /// assert!(!lf.is_ascii_lowercase());
2129    /// assert!(!esc.is_ascii_lowercase());
2130    /// ```
2131    #[must_use]
2132    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2133    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2134    #[inline]
2135    pub const fn is_ascii_lowercase(&self) -> bool {
2136        matches!(*self, 'a'..='z')
2137    }
2138
2139    /// Checks if the value is an ASCII alphanumeric character:
2140    ///
2141    /// - U+0041 'A' ..= U+005A 'Z', or
2142    /// - U+0061 'a' ..= U+007A 'z', or
2143    /// - U+0030 '0' ..= U+0039 '9'.
2144    ///
2145    /// # Examples
2146    ///
2147    /// ```
2148    /// let uppercase_a = 'A';
2149    /// let uppercase_g = 'G';
2150    /// let a = 'a';
2151    /// let g = 'g';
2152    /// let zero = '0';
2153    /// let percent = '%';
2154    /// let space = ' ';
2155    /// let lf = '\n';
2156    /// let esc = '\x1b';
2157    ///
2158    /// assert!(uppercase_a.is_ascii_alphanumeric());
2159    /// assert!(uppercase_g.is_ascii_alphanumeric());
2160    /// assert!(a.is_ascii_alphanumeric());
2161    /// assert!(g.is_ascii_alphanumeric());
2162    /// assert!(zero.is_ascii_alphanumeric());
2163    /// assert!(!percent.is_ascii_alphanumeric());
2164    /// assert!(!space.is_ascii_alphanumeric());
2165    /// assert!(!lf.is_ascii_alphanumeric());
2166    /// assert!(!esc.is_ascii_alphanumeric());
2167    /// ```
2168    #[must_use]
2169    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2170    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2171    #[inline]
2172    pub const fn is_ascii_alphanumeric(&self) -> bool {
2173        matches!(*self, '0'..='9') | matches!(*self, 'A'..='Z') | matches!(*self, 'a'..='z')
2174    }
2175
2176    /// Checks if the value is an ASCII decimal digit:
2177    /// U+0030 '0' ..= U+0039 '9'.
2178    ///
2179    /// # Examples
2180    ///
2181    /// ```
2182    /// let uppercase_a = 'A';
2183    /// let uppercase_g = 'G';
2184    /// let a = 'a';
2185    /// let g = 'g';
2186    /// let zero = '0';
2187    /// let percent = '%';
2188    /// let space = ' ';
2189    /// let lf = '\n';
2190    /// let esc = '\x1b';
2191    ///
2192    /// assert!(!uppercase_a.is_ascii_digit());
2193    /// assert!(!uppercase_g.is_ascii_digit());
2194    /// assert!(!a.is_ascii_digit());
2195    /// assert!(!g.is_ascii_digit());
2196    /// assert!(zero.is_ascii_digit());
2197    /// assert!(!percent.is_ascii_digit());
2198    /// assert!(!space.is_ascii_digit());
2199    /// assert!(!lf.is_ascii_digit());
2200    /// assert!(!esc.is_ascii_digit());
2201    /// ```
2202    #[must_use]
2203    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2204    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2205    #[inline]
2206    pub const fn is_ascii_digit(&self) -> bool {
2207        matches!(*self, '0'..='9')
2208    }
2209
2210    /// Checks if the value is an ASCII octal digit:
2211    /// U+0030 '0' ..= U+0037 '7'.
2212    ///
2213    /// # Examples
2214    ///
2215    /// ```
2216    /// #![feature(is_ascii_octdigit)]
2217    ///
2218    /// let uppercase_a = 'A';
2219    /// let a = 'a';
2220    /// let zero = '0';
2221    /// let seven = '7';
2222    /// let nine = '9';
2223    /// let percent = '%';
2224    /// let lf = '\n';
2225    ///
2226    /// assert!(!uppercase_a.is_ascii_octdigit());
2227    /// assert!(!a.is_ascii_octdigit());
2228    /// assert!(zero.is_ascii_octdigit());
2229    /// assert!(seven.is_ascii_octdigit());
2230    /// assert!(!nine.is_ascii_octdigit());
2231    /// assert!(!percent.is_ascii_octdigit());
2232    /// assert!(!lf.is_ascii_octdigit());
2233    /// ```
2234    #[must_use]
2235    #[unstable(feature = "is_ascii_octdigit", issue = "101288")]
2236    #[inline]
2237    pub const fn is_ascii_octdigit(&self) -> bool {
2238        matches!(*self, '0'..='7')
2239    }
2240
2241    /// Checks if the value is an ASCII hexadecimal digit:
2242    ///
2243    /// - U+0030 '0' ..= U+0039 '9', or
2244    /// - U+0041 'A' ..= U+0046 'F', or
2245    /// - U+0061 'a' ..= U+0066 'f'.
2246    ///
2247    /// # Examples
2248    ///
2249    /// ```
2250    /// let uppercase_a = 'A';
2251    /// let uppercase_g = 'G';
2252    /// let a = 'a';
2253    /// let g = 'g';
2254    /// let zero = '0';
2255    /// let percent = '%';
2256    /// let space = ' ';
2257    /// let lf = '\n';
2258    /// let esc = '\x1b';
2259    ///
2260    /// assert!(uppercase_a.is_ascii_hexdigit());
2261    /// assert!(!uppercase_g.is_ascii_hexdigit());
2262    /// assert!(a.is_ascii_hexdigit());
2263    /// assert!(!g.is_ascii_hexdigit());
2264    /// assert!(zero.is_ascii_hexdigit());
2265    /// assert!(!percent.is_ascii_hexdigit());
2266    /// assert!(!space.is_ascii_hexdigit());
2267    /// assert!(!lf.is_ascii_hexdigit());
2268    /// assert!(!esc.is_ascii_hexdigit());
2269    /// ```
2270    #[must_use]
2271    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2272    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2273    #[inline]
2274    pub const fn is_ascii_hexdigit(&self) -> bool {
2275        matches!(*self, '0'..='9') | matches!(*self, 'A'..='F') | matches!(*self, 'a'..='f')
2276    }
2277
2278    /// Checks if the value is an ASCII punctuation or symbol character
2279    /// (i.e. not alphanumeric, whitespace, or control):
2280    ///
2281    /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /`, or
2282    /// - U+003A ..= U+0040 `: ; < = > ? @`, or
2283    /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``, or
2284    /// - U+007B ..= U+007E `{ | } ~`
2285    ///
2286    /// # Examples
2287    ///
2288    /// ```
2289    /// let uppercase_a = 'A';
2290    /// let uppercase_g = 'G';
2291    /// let a = 'a';
2292    /// let g = 'g';
2293    /// let zero = '0';
2294    /// let percent = '%';
2295    /// let space = ' ';
2296    /// let lf = '\n';
2297    /// let esc = '\x1b';
2298    ///
2299    /// assert!(!uppercase_a.is_ascii_punctuation());
2300    /// assert!(!uppercase_g.is_ascii_punctuation());
2301    /// assert!(!a.is_ascii_punctuation());
2302    /// assert!(!g.is_ascii_punctuation());
2303    /// assert!(!zero.is_ascii_punctuation());
2304    /// assert!(percent.is_ascii_punctuation());
2305    /// assert!(!space.is_ascii_punctuation());
2306    /// assert!(!lf.is_ascii_punctuation());
2307    /// assert!(!esc.is_ascii_punctuation());
2308    /// ```
2309    #[must_use]
2310    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2311    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2312    #[inline]
2313    pub const fn is_ascii_punctuation(&self) -> bool {
2314        matches!(*self, '!'..='/')
2315            | matches!(*self, ':'..='@')
2316            | matches!(*self, '['..='`')
2317            | matches!(*self, '{'..='~')
2318    }
2319
2320    /// Checks if the value is an ASCII graphic character
2321    /// (i.e. not whitespace or control):
2322    /// U+0021 '!' ..= U+007E '~'.
2323    ///
2324    /// # Examples
2325    ///
2326    /// ```
2327    /// let uppercase_a = 'A';
2328    /// let uppercase_g = 'G';
2329    /// let a = 'a';
2330    /// let g = 'g';
2331    /// let zero = '0';
2332    /// let percent = '%';
2333    /// let space = ' ';
2334    /// let lf = '\n';
2335    /// let esc = '\x1b';
2336    ///
2337    /// assert!(uppercase_a.is_ascii_graphic());
2338    /// assert!(uppercase_g.is_ascii_graphic());
2339    /// assert!(a.is_ascii_graphic());
2340    /// assert!(g.is_ascii_graphic());
2341    /// assert!(zero.is_ascii_graphic());
2342    /// assert!(percent.is_ascii_graphic());
2343    /// assert!(!space.is_ascii_graphic());
2344    /// assert!(!lf.is_ascii_graphic());
2345    /// assert!(!esc.is_ascii_graphic());
2346    /// ```
2347    #[must_use]
2348    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2349    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2350    #[inline]
2351    pub const fn is_ascii_graphic(&self) -> bool {
2352        matches!(*self, '!'..='~')
2353    }
2354
2355    /// Checks if the value is an ASCII whitespace character:
2356    /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
2357    /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
2358    ///
2359    /// **Warning:** Because the list above excludes U+000B VERTICAL TAB,
2360    /// `c.is_ascii_whitespace()` is **not** equivalent to `c.is_ascii() && c.is_whitespace()`.
2361    ///
2362    /// Rust uses the WhatWG Infra Standard's [definition of ASCII
2363    /// whitespace][infra-aw]. There are several other definitions in
2364    /// wide use. For instance, [the POSIX locale][pct] includes
2365    /// U+000B VERTICAL TAB as well as all the above characters,
2366    /// but—from the very same specification—[the default rule for
2367    /// "field splitting" in the Bourne shell][bfs] considers *only*
2368    /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
2369    ///
2370    /// If you are writing a program that will process an existing
2371    /// file format, check what that format's definition of whitespace is
2372    /// before using this function.
2373    ///
2374    /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
2375    /// [pct]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
2376    /// [bfs]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
2377    ///
2378    /// # Examples
2379    ///
2380    /// ```
2381    /// let uppercase_a = 'A';
2382    /// let uppercase_g = 'G';
2383    /// let a = 'a';
2384    /// let g = 'g';
2385    /// let zero = '0';
2386    /// let percent = '%';
2387    /// let space = ' ';
2388    /// let lf = '\n';
2389    /// let esc = '\x1b';
2390    ///
2391    /// assert!(!uppercase_a.is_ascii_whitespace());
2392    /// assert!(!uppercase_g.is_ascii_whitespace());
2393    /// assert!(!a.is_ascii_whitespace());
2394    /// assert!(!g.is_ascii_whitespace());
2395    /// assert!(!zero.is_ascii_whitespace());
2396    /// assert!(!percent.is_ascii_whitespace());
2397    /// assert!(space.is_ascii_whitespace());
2398    /// assert!(lf.is_ascii_whitespace());
2399    /// assert!(!esc.is_ascii_whitespace());
2400    /// ```
2401    #[must_use]
2402    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2403    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2404    #[inline]
2405    #[ferrocene::prevalidated]
2406    pub const fn is_ascii_whitespace(&self) -> bool {
2407        matches!(*self, '\t' | '\n' | '\x0C' | '\r' | ' ')
2408    }
2409
2410    /// Checks if the value is an ASCII control character:
2411    /// U+0000 NUL ..= U+001F UNIT SEPARATOR, or U+007F DELETE.
2412    /// Note that most ASCII whitespace characters are control
2413    /// characters, but SPACE is not.
2414    ///
2415    /// # Examples
2416    ///
2417    /// ```
2418    /// let uppercase_a = 'A';
2419    /// let uppercase_g = 'G';
2420    /// let a = 'a';
2421    /// let g = 'g';
2422    /// let zero = '0';
2423    /// let percent = '%';
2424    /// let space = ' ';
2425    /// let lf = '\n';
2426    /// let esc = '\x1b';
2427    ///
2428    /// assert!(!uppercase_a.is_ascii_control());
2429    /// assert!(!uppercase_g.is_ascii_control());
2430    /// assert!(!a.is_ascii_control());
2431    /// assert!(!g.is_ascii_control());
2432    /// assert!(!zero.is_ascii_control());
2433    /// assert!(!percent.is_ascii_control());
2434    /// assert!(!space.is_ascii_control());
2435    /// assert!(lf.is_ascii_control());
2436    /// assert!(esc.is_ascii_control());
2437    /// ```
2438    #[must_use]
2439    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2440    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2441    #[inline]
2442    pub const fn is_ascii_control(&self) -> bool {
2443        matches!(*self, '\0'..='\x1F' | '\x7F')
2444    }
2445}
2446
2447#[ferrocene::prevalidated]
2448pub(crate) struct EscapeDebugExtArgs {
2449    /// Escape Grapheme Extender codepoints?
2450    pub(crate) escape_grapheme_extender: bool,
2451
2452    /// Escape single quotes?
2453    pub(crate) escape_single_quote: bool,
2454
2455    /// Escape double quotes?
2456    pub(crate) escape_double_quote: bool,
2457}
2458
2459impl EscapeDebugExtArgs {
2460    pub(crate) const ESCAPE_ALL: Self = Self {
2461        escape_grapheme_extender: true,
2462        escape_single_quote: true,
2463        escape_double_quote: true,
2464    };
2465}
2466
2467#[inline]
2468#[must_use]
2469#[ferrocene::prevalidated]
2470const fn len_utf8(code: u32) -> usize {
2471    match code {
2472        ..MAX_ONE_B => 1,
2473        ..MAX_TWO_B => 2,
2474        ..MAX_THREE_B => 3,
2475        _ => 4,
2476    }
2477}
2478
2479#[inline]
2480#[must_use]
2481const fn len_utf16(code: u32) -> usize {
2482    if (code & 0xFFFF) == code { 1 } else { 2 }
2483}
2484
2485/// Encodes a raw `u32` value as UTF-8 into the provided byte buffer,
2486/// and then returns the subslice of the buffer that contains the encoded character.
2487///
2488/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
2489/// (Creating a `char` in the surrogate range is UB.)
2490/// The result is valid [generalized UTF-8] but not valid UTF-8.
2491///
2492/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
2493///
2494/// # Panics
2495///
2496/// Panics if the buffer is not large enough.
2497/// A buffer of length four is large enough to encode any `char`.
2498#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
2499#[doc(hidden)]
2500#[inline]
2501#[ferrocene::prevalidated]
2502pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
2503    let len = len_utf8(code);
2504    if dst.len() < len {
2505        const_panic!(
2506            "encode_utf8: buffer does not have enough bytes to encode code point",
2507            "encode_utf8: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
2508            code: u32 = code,
2509            len: usize = len,
2510            dst_len: usize = dst.len(),
2511        );
2512    }
2513
2514    // SAFETY: `dst` is checked to be at least the length needed to encode the codepoint.
2515    unsafe { encode_utf8_raw_unchecked(code, dst.as_mut_ptr()) };
2516
2517    // SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
2518    unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
2519}
2520
2521/// Encodes a raw `u32` value as UTF-8 into the byte buffer pointed to by `dst`.
2522///
2523/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
2524/// (Creating a `char` in the surrogate range is UB.)
2525/// The result is valid [generalized UTF-8] but not valid UTF-8.
2526///
2527/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
2528///
2529/// # Safety
2530///
2531/// The behavior is undefined if the buffer pointed to by `dst` is not
2532/// large enough to hold the encoded codepoint. A buffer of length four
2533/// is large enough to encode any `char`.
2534///
2535/// For a safe version of this function, see the [`encode_utf8_raw`] function.
2536#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
2537#[doc(hidden)]
2538#[inline]
2539#[ferrocene::prevalidated]
2540pub const unsafe fn encode_utf8_raw_unchecked(code: u32, dst: *mut u8) {
2541    let len = len_utf8(code);
2542    // SAFETY: The caller must guarantee that the buffer pointed to by `dst`
2543    // is at least `len` bytes long.
2544    unsafe {
2545        if len == 1 {
2546            *dst = code as u8;
2547            return;
2548        }
2549
2550        let last1 = (code >> 0 & 0x3F) as u8 | TAG_CONT;
2551        let last2 = (code >> 6 & 0x3F) as u8 | TAG_CONT;
2552        let last3 = (code >> 12 & 0x3F) as u8 | TAG_CONT;
2553        let last4 = (code >> 18 & 0x3F) as u8 | TAG_FOUR_B;
2554
2555        if len == 2 {
2556            *dst = last2 | TAG_TWO_B;
2557            *dst.add(1) = last1;
2558            return;
2559        }
2560
2561        if len == 3 {
2562            *dst = last3 | TAG_THREE_B;
2563            *dst.add(1) = last2;
2564            *dst.add(2) = last1;
2565            return;
2566        }
2567
2568        *dst = last4;
2569        *dst.add(1) = last3;
2570        *dst.add(2) = last2;
2571        *dst.add(3) = last1;
2572    }
2573}
2574
2575/// Encodes a raw `u32` value as native endian UTF-16 into the provided `u16` buffer,
2576/// and then returns the subslice of the buffer that contains the encoded character.
2577///
2578/// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range.
2579/// (Creating a `char` in the surrogate range is UB.)
2580///
2581/// # Panics
2582///
2583/// Panics if the buffer is not large enough.
2584/// A buffer of length 2 is large enough to encode any `char`.
2585#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
2586#[doc(hidden)]
2587#[inline]
2588pub const fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
2589    let len = len_utf16(code);
2590    match (len, &mut *dst) {
2591        (1, [a, ..]) => {
2592            *a = code as u16;
2593        }
2594        (2, [a, b, ..]) => {
2595            code -= 0x1_0000;
2596            *a = (code >> 10) as u16 | 0xD800;
2597            *b = (code & 0x3FF) as u16 | 0xDC00;
2598        }
2599        _ => {
2600            const_panic!(
2601                "encode_utf16: buffer does not have enough bytes to encode code point",
2602                "encode_utf16: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
2603                code: u32 = code,
2604                len: usize = len,
2605                dst_len: usize = dst.len(),
2606            )
2607        }
2608    };
2609    // SAFETY: `<&mut [u16]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
2610    unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
2611}