Skip to main content

core/char/
mod.rs

1//! Utilities for the `char` primitive type.
2//!
3//! *[See also the `char` primitive type](primitive@char).*
4//!
5//! The `char` type represents a single character. More specifically, since
6//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
7//! scalar value]', which is similar to, but not the same as, a '[Unicode code
8//! point]'.
9//!
10//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
11//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
12//!
13//! This module exists for technical reasons, the primary documentation for
14//! `char` is directly on [the `char` primitive type][char] itself.
15//!
16//! This module is the home of the iterator implementations for the iterators
17//! implemented on `char`, as well as some useful constants and conversion
18//! functions that convert various types to `char`.
19
20#![allow(non_snake_case)]
21#![stable(feature = "rust1", since = "1.0.0")]
22
23mod convert;
24mod decode;
25mod methods;
26
27// stable re-exports
28#[rustfmt::skip]
29#[stable(feature = "try_from", since = "1.34.0")]
30pub use self::convert::CharTryFromError;
31#[stable(feature = "char_from_str", since = "1.20.0")]
32pub use self::convert::ParseCharError;
33#[stable(feature = "decode_utf16", since = "1.9.0")]
34pub use self::decode::{DecodeUtf16, DecodeUtf16Error};
35
36// perma-unstable re-exports
37#[rustfmt::skip]
38#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
39pub use self::methods::encode_utf16_raw; // perma-unstable
40#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
41pub use self::methods::{encode_utf8_raw, encode_utf8_raw_unchecked}; // perma-unstable
42
43#[rustfmt::skip]
44use crate::ascii;
45pub(crate) use self::methods::EscapeDebugExtArgs;
46use crate::error::Error;
47use crate::escape::{AlwaysEscaped, EscapeIterInner, MaybeEscaped};
48use crate::fmt::{self, Write};
49use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
50use crate::num::NonZero;
51
52// UTF-8 ranges and tags for encoding characters
53const TAG_CONT: u8 = 0b1000_0000;
54const TAG_TWO_B: u8 = 0b1100_0000;
55const TAG_THREE_B: u8 = 0b1110_0000;
56const TAG_FOUR_B: u8 = 0b1111_0000;
57const MAX_ONE_B: u32 = 0x80;
58const MAX_TWO_B: u32 = 0x800;
59const MAX_THREE_B: u32 = 0x10000;
60
61/*
62    Lu  Uppercase_Letter        an uppercase letter
63    Ll  Lowercase_Letter        a lowercase letter
64    Lt  Titlecase_Letter        a digraphic character, with first part uppercase
65    Lm  Modifier_Letter         a modifier letter
66    Lo  Other_Letter            other letters, including syllables and ideographs
67    Mn  Nonspacing_Mark         a nonspacing combining mark (zero advance width)
68    Mc  Spacing_Mark            a spacing combining mark (positive advance width)
69    Me  Enclosing_Mark          an enclosing combining mark
70    Nd  Decimal_Number          a decimal digit
71    Nl  Letter_Number           a letterlike numeric character
72    No  Other_Number            a numeric character of other type
73    Pc  Connector_Punctuation   a connecting punctuation mark, like a tie
74    Pd  Dash_Punctuation        a dash or hyphen punctuation mark
75    Ps  Open_Punctuation        an opening punctuation mark (of a pair)
76    Pe  Close_Punctuation       a closing punctuation mark (of a pair)
77    Pi  Initial_Punctuation     an initial quotation mark
78    Pf  Final_Punctuation       a final quotation mark
79    Po  Other_Punctuation       a punctuation mark of other type
80    Sm  Math_Symbol             a symbol of primarily mathematical use
81    Sc  Currency_Symbol         a currency sign
82    Sk  Modifier_Symbol         a non-letterlike modifier symbol
83    So  Other_Symbol            a symbol of other type
84    Zs  Space_Separator         a space character (of various non-zero widths)
85    Zl  Line_Separator          U+2028 LINE SEPARATOR only
86    Zp  Paragraph_Separator     U+2029 PARAGRAPH SEPARATOR only
87    Cc  Control                 a C0 or C1 control code
88    Cf  Format                  a format control character
89    Cs  Surrogate               a surrogate code point
90    Co  Private_Use             a private-use character
91    Cn  Unassigned              a reserved unassigned code point or a noncharacter
92*/
93
94/// The highest valid code point a `char` can have, `'\u{10FFFF}'`. Use [`char::MAX`] instead.
95#[stable(feature = "rust1", since = "1.0.0")]
96pub const MAX: char = char::MAX;
97
98/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
99/// UTF-8 encoding.
100#[unstable(feature = "char_max_len", issue = "121714")]
101pub const MAX_LEN_UTF8: usize = char::MAX_LEN_UTF8;
102
103/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
104/// to UTF-16 encoding.
105#[unstable(feature = "char_max_len", issue = "121714")]
106pub const MAX_LEN_UTF16: usize = char::MAX_LEN_UTF16;
107
108/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
109/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
110#[stable(feature = "decode_utf16", since = "1.9.0")]
111pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
112
113/// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
114/// `char` and `str` methods are based on. Use [`char::UNICODE_VERSION`] instead.
115#[stable(feature = "unicode_version", since = "1.45.0")]
116pub const UNICODE_VERSION: (u8, u8, u8) = char::UNICODE_VERSION;
117
118/// Creates an iterator over the UTF-16 encoded code points in `iter`, returning
119/// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead.
120#[stable(feature = "decode_utf16", since = "1.9.0")]
121#[inline]
122pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
123    self::decode::decode_utf16(iter)
124}
125
126/// Converts a `u32` to a `char`. Use [`char::from_u32`] instead.
127#[stable(feature = "rust1", since = "1.0.0")]
128#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
129#[must_use]
130#[inline]
131pub const fn from_u32(i: u32) -> Option<char> {
132    self::convert::from_u32(i)
133}
134
135/// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`]
136/// instead.
137#[stable(feature = "char_from_unchecked", since = "1.5.0")]
138#[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")]
139#[must_use]
140#[inline]
141pub const unsafe fn from_u32_unchecked(i: u32) -> char {
142    // SAFETY: the safety contract must be upheld by the caller.
143    unsafe { self::convert::from_u32_unchecked(i) }
144}
145
146/// Converts a digit in the given radix to a `char`. Use [`char::from_digit`] instead.
147#[stable(feature = "rust1", since = "1.0.0")]
148#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
149#[must_use]
150#[inline]
151pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
152    self::convert::from_digit(num, radix)
153}
154
155/// Returns an iterator that yields the hexadecimal Unicode escape of a
156/// character, as `char`s.
157///
158/// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
159/// its documentation for more.
160///
161/// [`escape_unicode`]: char::escape_unicode
162#[derive(Clone, Debug)]
163#[stable(feature = "rust1", since = "1.0.0")]
164pub struct EscapeUnicode(EscapeIterInner<10, AlwaysEscaped>);
165
166impl EscapeUnicode {
167    #[inline]
168    const fn new(c: char) -> Self {
169        Self(EscapeIterInner::unicode(c))
170    }
171}
172
173#[stable(feature = "rust1", since = "1.0.0")]
174impl Iterator for EscapeUnicode {
175    type Item = char;
176
177    #[inline]
178    fn next(&mut self) -> Option<char> {
179        self.0.next().map(char::from)
180    }
181
182    #[inline]
183    fn size_hint(&self) -> (usize, Option<usize>) {
184        let n = self.0.len();
185        (n, Some(n))
186    }
187
188    #[inline]
189    fn count(self) -> usize {
190        self.0.len()
191    }
192
193    #[inline]
194    fn last(mut self) -> Option<char> {
195        self.0.next_back().map(char::from)
196    }
197
198    #[inline]
199    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
200        self.0.advance_by(n)
201    }
202}
203
204#[stable(feature = "exact_size_escape", since = "1.11.0")]
205impl ExactSizeIterator for EscapeUnicode {
206    #[inline]
207    fn len(&self) -> usize {
208        self.0.len()
209    }
210}
211
212#[stable(feature = "fused", since = "1.26.0")]
213impl FusedIterator for EscapeUnicode {}
214
215#[stable(feature = "char_struct_display", since = "1.16.0")]
216impl fmt::Display for EscapeUnicode {
217    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
218        fmt::Display::fmt(&self.0, f)
219    }
220}
221
222/// An iterator that yields the literal escape code of a `char`.
223///
224/// This `struct` is created by the [`escape_default`] method on [`char`]. See
225/// its documentation for more.
226///
227/// [`escape_default`]: char::escape_default
228#[derive(Clone, Debug)]
229#[stable(feature = "rust1", since = "1.0.0")]
230pub struct EscapeDefault(EscapeIterInner<10, AlwaysEscaped>);
231
232impl EscapeDefault {
233    #[inline]
234    const fn printable(c: ascii::Char) -> Self {
235        Self(EscapeIterInner::ascii(c.to_u8()))
236    }
237
238    #[inline]
239    const fn backslash(c: ascii::Char) -> Self {
240        Self(EscapeIterInner::backslash(c))
241    }
242
243    #[inline]
244    const fn unicode(c: char) -> Self {
245        Self(EscapeIterInner::unicode(c))
246    }
247}
248
249#[stable(feature = "rust1", since = "1.0.0")]
250impl Iterator for EscapeDefault {
251    type Item = char;
252
253    #[inline]
254    fn next(&mut self) -> Option<char> {
255        self.0.next().map(char::from)
256    }
257
258    #[inline]
259    fn size_hint(&self) -> (usize, Option<usize>) {
260        let n = self.0.len();
261        (n, Some(n))
262    }
263
264    #[inline]
265    fn count(self) -> usize {
266        self.0.len()
267    }
268
269    #[inline]
270    fn last(mut self) -> Option<char> {
271        self.0.next_back().map(char::from)
272    }
273
274    #[inline]
275    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
276        self.0.advance_by(n)
277    }
278}
279
280#[stable(feature = "exact_size_escape", since = "1.11.0")]
281impl ExactSizeIterator for EscapeDefault {
282    #[inline]
283    fn len(&self) -> usize {
284        self.0.len()
285    }
286}
287
288#[stable(feature = "fused", since = "1.26.0")]
289impl FusedIterator for EscapeDefault {}
290
291#[stable(feature = "char_struct_display", since = "1.16.0")]
292impl fmt::Display for EscapeDefault {
293    #[inline]
294    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
295        fmt::Display::fmt(&self.0, f)
296    }
297}
298
299/// An iterator that yields the literal escape code of a `char`.
300///
301/// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
302/// documentation for more.
303///
304/// [`escape_debug`]: char::escape_debug
305#[stable(feature = "char_escape_debug", since = "1.20.0")]
306#[derive(Clone, Debug)]
307#[ferrocene::prevalidated]
308pub struct EscapeDebug(EscapeIterInner<10, MaybeEscaped>);
309
310impl EscapeDebug {
311    #[inline]
312    #[ferrocene::prevalidated]
313    const fn printable(chr: char) -> Self {
314        Self(EscapeIterInner::printable(chr))
315    }
316
317    #[inline]
318    #[ferrocene::prevalidated]
319    const fn backslash(c: ascii::Char) -> Self {
320        Self(EscapeIterInner::backslash(c))
321    }
322
323    #[inline]
324    #[ferrocene::prevalidated]
325    const fn unicode(c: char) -> Self {
326        Self(EscapeIterInner::unicode(c))
327    }
328}
329
330#[stable(feature = "char_escape_debug", since = "1.20.0")]
331impl Iterator for EscapeDebug {
332    type Item = char;
333
334    #[inline]
335    #[ferrocene::prevalidated]
336    fn next(&mut self) -> Option<char> {
337        self.0.next()
338    }
339
340    #[inline]
341    #[ferrocene::prevalidated]
342    fn size_hint(&self) -> (usize, Option<usize>) {
343        let n = self.len();
344        (n, Some(n))
345    }
346
347    #[inline]
348    #[ferrocene::prevalidated]
349    fn count(self) -> usize {
350        self.len()
351    }
352}
353
354#[stable(feature = "char_escape_debug", since = "1.20.0")]
355impl ExactSizeIterator for EscapeDebug {
356    #[ferrocene::prevalidated]
357    fn len(&self) -> usize {
358        self.0.len()
359    }
360}
361
362#[stable(feature = "fused", since = "1.26.0")]
363impl FusedIterator for EscapeDebug {}
364
365#[stable(feature = "char_escape_debug", since = "1.20.0")]
366impl fmt::Display for EscapeDebug {
367    #[inline]
368    #[ferrocene::prevalidated]
369    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
370        fmt::Display::fmt(&self.0, f)
371    }
372}
373
374macro_rules! casemappingiter_impls {
375    (
376        #[$stab:meta]
377        #[$dendstab:meta]
378        #[$fusedstab:meta]
379        #[$exactstab:meta]
380        #[$displaystab:meta]
381        $(#[$attr:meta])*
382        $ITER_NAME:ident
383    ) => {
384        $(#[$attr])*
385        #[$stab]
386        #[derive(Debug, Clone)]
387        pub struct $ITER_NAME(CaseMappingIter);
388
389        #[$stab]
390        impl Iterator for $ITER_NAME {
391            type Item = char;
392            fn next(&mut self) -> Option<char> {
393                self.0.next()
394            }
395
396            fn size_hint(&self) -> (usize, Option<usize>) {
397                self.0.size_hint()
398            }
399
400            fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
401            where
402                Fold: FnMut(Acc, Self::Item) -> Acc,
403            {
404                self.0.fold(init, fold)
405            }
406
407            fn count(self) -> usize {
408                self.0.count()
409            }
410
411            fn last(self) -> Option<Self::Item> {
412                self.0.last()
413            }
414
415            fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
416                self.0.advance_by(n)
417            }
418
419            unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
420                // SAFETY: just forwarding requirements to caller
421                unsafe { self.0.__iterator_get_unchecked(idx) }
422            }
423        }
424
425        #[$dendstab]
426        impl DoubleEndedIterator for $ITER_NAME {
427            fn next_back(&mut self) -> Option<char> {
428                self.0.next_back()
429            }
430
431            fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
432            where
433                Fold: FnMut(Acc, Self::Item) -> Acc,
434            {
435                self.0.rfold(init, rfold)
436            }
437
438            fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
439                self.0.advance_back_by(n)
440            }
441        }
442
443        #[$fusedstab]
444        impl FusedIterator for $ITER_NAME {}
445
446        #[$exactstab]
447        impl ExactSizeIterator for $ITER_NAME {
448            fn len(&self) -> usize {
449                self.0.len()
450            }
451
452            fn is_empty(&self) -> bool {
453                self.0.is_empty()
454            }
455        }
456
457        // SAFETY: forwards to inner `array::IntoIter`
458        #[unstable(feature = "trusted_len", issue = "37572")]
459        unsafe impl TrustedLen for $ITER_NAME {}
460
461        // SAFETY: forwards to inner `array::IntoIter`
462        #[doc(hidden)]
463        #[unstable(feature = "std_internals", issue = "none")]
464        unsafe impl TrustedRandomAccessNoCoerce for $ITER_NAME {
465            const MAY_HAVE_SIDE_EFFECT: bool = false;
466        }
467
468        // SAFETY: this iter has no subtypes/supertypes
469        #[doc(hidden)]
470        #[unstable(feature = "std_internals", issue = "none")]
471        unsafe impl TrustedRandomAccess for $ITER_NAME {}
472
473        #[$displaystab]
474        impl fmt::Display for $ITER_NAME {
475            #[inline]
476            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
477                fmt::Display::fmt(&self.0, f)
478            }
479        }
480    }
481}
482
483casemappingiter_impls! {
484    #[stable(feature = "rust1", since = "1.0.0")]
485    #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
486    #[stable(feature = "fused", since = "1.26.0")]
487    #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
488    #[stable(feature = "char_struct_display", since = "1.16.0")]
489    /// Returns an iterator that yields the uppercase equivalent of a `char`.
490    ///
491    /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
492    /// its documentation for more.
493    ///
494    /// [`to_uppercase`]: char::to_uppercase
495    ToUppercase
496}
497
498casemappingiter_impls! {
499    #[unstable(feature = "titlecase", issue = "153892")]
500    #[unstable(feature = "titlecase", issue = "153892")]
501    #[unstable(feature = "titlecase", issue = "153892")]
502    #[unstable(feature = "titlecase", issue = "153892")]
503    #[unstable(feature = "titlecase", issue = "153892")]
504    /// Returns an iterator that yields the titlecase equivalent of a `char`.
505    ///
506    /// This `struct` is created by the [`to_titlecase`] method on [`char`]. See
507    /// its documentation for more.
508    ///
509    /// [`to_titlecase`]: char::to_titlecase
510    ToTitlecase
511}
512
513casemappingiter_impls! {
514    #[stable(feature = "rust1", since = "1.0.0")]
515    #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
516    #[stable(feature = "fused", since = "1.26.0")]
517    #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
518    #[stable(feature = "char_struct_display", since = "1.16.0")]
519    /// Returns an iterator that yields the lowercase equivalent of a `char`.
520    ///
521    /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
522    /// its documentation for more.
523    ///
524    /// [`to_lowercase`]: char::to_lowercase
525    ToLowercase
526}
527
528#[derive(Debug, Clone)]
529struct CaseMappingIter(core::array::IntoIter<char, 3>);
530
531impl CaseMappingIter {
532    #[inline]
533    fn new(chars: [char; 3]) -> CaseMappingIter {
534        let mut iter = chars.into_iter();
535        if chars[2] == '\0' {
536            iter.next_back();
537            if chars[1] == '\0' {
538                iter.next_back();
539
540                // Deliberately don't check `chars[0]`,
541                // as '\0' lowercases to itself
542            }
543        }
544        CaseMappingIter(iter)
545    }
546}
547
548impl Iterator for CaseMappingIter {
549    type Item = char;
550
551    fn next(&mut self) -> Option<char> {
552        self.0.next()
553    }
554
555    fn size_hint(&self) -> (usize, Option<usize>) {
556        self.0.size_hint()
557    }
558
559    fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
560    where
561        Fold: FnMut(Acc, Self::Item) -> Acc,
562    {
563        self.0.fold(init, fold)
564    }
565
566    fn count(self) -> usize {
567        self.0.count()
568    }
569
570    fn last(self) -> Option<Self::Item> {
571        self.0.last()
572    }
573
574    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
575        self.0.advance_by(n)
576    }
577
578    unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
579        // SAFETY: just forwarding requirements to caller
580        unsafe { self.0.__iterator_get_unchecked(idx) }
581    }
582}
583
584impl DoubleEndedIterator for CaseMappingIter {
585    fn next_back(&mut self) -> Option<char> {
586        self.0.next_back()
587    }
588
589    fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
590    where
591        Fold: FnMut(Acc, Self::Item) -> Acc,
592    {
593        self.0.rfold(init, rfold)
594    }
595
596    fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
597        self.0.advance_back_by(n)
598    }
599}
600
601impl ExactSizeIterator for CaseMappingIter {
602    fn len(&self) -> usize {
603        self.0.len()
604    }
605
606    fn is_empty(&self) -> bool {
607        self.0.is_empty()
608    }
609}
610
611impl FusedIterator for CaseMappingIter {}
612
613// SAFETY: forwards to inner `array::IntoIter`
614unsafe impl TrustedLen for CaseMappingIter {}
615
616// SAFETY: forwards to inner `array::IntoIter`
617unsafe impl TrustedRandomAccessNoCoerce for CaseMappingIter {
618    const MAY_HAVE_SIDE_EFFECT: bool = false;
619}
620
621// SAFETY: `CaseMappingIter` has no subtypes/supertypes
622unsafe impl TrustedRandomAccess for CaseMappingIter {}
623
624impl fmt::Display for CaseMappingIter {
625    #[inline]
626    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
627        for c in self.0.clone() {
628            f.write_char(c)?;
629        }
630        Ok(())
631    }
632}
633
634/// The error type returned when a checked char conversion fails.
635#[stable(feature = "u8_from_char", since = "1.59.0")]
636#[derive(Debug, Copy, Clone, PartialEq, Eq)]
637pub struct TryFromCharError(pub(crate) ());
638
639#[stable(feature = "u8_from_char", since = "1.59.0")]
640impl fmt::Display for TryFromCharError {
641    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
642        "unicode code point out of range".fmt(fmt)
643    }
644}
645
646#[stable(feature = "u8_from_char", since = "1.59.0")]
647impl Error for TryFromCharError {}
648
649/// The case of a cased character,
650/// as returned by [`char::case`].
651///
652/// Titlecase characters conceptually are composed of an uppercase portion
653/// followed by a lowercase portion.
654/// The variant discriminants represent this:
655/// the most significant bit represents whether the case
656/// conceptually starts as uppercase, while the least significant bit
657/// represents whether it conceptually ends as uppercase.
658#[unstable(feature = "titlecase", issue = "153892")]
659#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
660pub enum CharCase {
661    /// Lowercase. Corresponds to the `Lowercase` Unicode property.
662    Lower = 0b00,
663    /// Titlecase. Corresponds to the `Titlecase_Letter` Unicode general category.
664    Title = 0b10,
665    /// Uppercase. Corresponds to the `Uppercase` Unicode property.
666    Upper = 0b11,
667}