core/char/
convert.rs

1//! Character conversions.
2
3#[cfg(not(feature = "ferrocene_subset"))]
4use crate::char::TryFromCharError;
5#[cfg(not(feature = "ferrocene_subset"))]
6use crate::error::Error;
7#[cfg(not(feature = "ferrocene_subset"))]
8use crate::fmt;
9use crate::mem::transmute;
10#[cfg(not(feature = "ferrocene_subset"))]
11use crate::str::FromStr;
12use crate::ub_checks::assert_unsafe_precondition;
13
14/// Converts a `u32` to a `char`. See [`char::from_u32`].
15#[cfg(not(feature = "ferrocene_subset"))]
16#[must_use]
17#[inline]
18pub(super) const fn from_u32(i: u32) -> Option<char> {
19    // FIXME(const-hack): once Result::ok is const fn, use it here
20    match char_try_from_u32(i) {
21        Ok(c) => Some(c),
22        Err(_) => None,
23    }
24}
25
26/// Converts a `u32` to a `char`, ignoring validity. See [`char::from_u32_unchecked`].
27#[inline]
28#[must_use]
29#[allow(unnecessary_transmutes)]
30#[track_caller]
31pub(super) const unsafe fn from_u32_unchecked(i: u32) -> char {
32    // SAFETY: the caller must guarantee that `i` is a valid char value.
33    unsafe {
34        assert_unsafe_precondition!(
35            check_language_ub,
36            "invalid value for `char`",
37            (i: u32 = i) => char_try_from_u32(i).is_ok()
38        );
39        transmute(i)
40    }
41}
42
43#[cfg(not(feature = "ferrocene_subset"))]
44#[stable(feature = "char_convert", since = "1.13.0")]
45#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
46impl const From<char> for u32 {
47    /// Converts a [`char`] into a [`u32`].
48    ///
49    /// # Examples
50    ///
51    /// ```
52    /// let c = 'c';
53    /// let u = u32::from(c);
54    ///
55    /// assert!(4 == size_of_val(&u))
56    /// ```
57    #[inline]
58    fn from(c: char) -> Self {
59        c as u32
60    }
61}
62
63#[cfg(not(feature = "ferrocene_subset"))]
64#[stable(feature = "more_char_conversions", since = "1.51.0")]
65#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
66impl const From<char> for u64 {
67    /// Converts a [`char`] into a [`u64`].
68    ///
69    /// # Examples
70    ///
71    /// ```
72    /// let c = '👤';
73    /// let u = u64::from(c);
74    ///
75    /// assert!(8 == size_of_val(&u))
76    /// ```
77    #[inline]
78    fn from(c: char) -> Self {
79        // The char is casted to the value of the code point, then zero-extended to 64 bit.
80        // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
81        c as u64
82    }
83}
84
85#[cfg(not(feature = "ferrocene_subset"))]
86#[stable(feature = "more_char_conversions", since = "1.51.0")]
87#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
88impl const From<char> for u128 {
89    /// Converts a [`char`] into a [`u128`].
90    ///
91    /// # Examples
92    ///
93    /// ```
94    /// let c = 'âš™';
95    /// let u = u128::from(c);
96    ///
97    /// assert!(16 == size_of_val(&u))
98    /// ```
99    #[inline]
100    fn from(c: char) -> Self {
101        // The char is casted to the value of the code point, then zero-extended to 128 bit.
102        // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
103        c as u128
104    }
105}
106
107/// Maps a `char` with a code point from U+0000 to U+00FF (inclusive) to a byte in `0x00..=0xFF` with
108/// the same value, failing if the code point is greater than U+00FF.
109///
110/// See [`impl From<u8> for char`](char#impl-From<u8>-for-char) for details on the encoding.
111#[cfg(not(feature = "ferrocene_subset"))]
112#[stable(feature = "u8_from_char", since = "1.59.0")]
113#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
114impl const TryFrom<char> for u8 {
115    type Error = TryFromCharError;
116
117    /// Tries to convert a [`char`] into a [`u8`].
118    ///
119    /// # Examples
120    ///
121    /// ```
122    /// let a = 'ÿ'; // U+00FF
123    /// let b = 'Ä€'; // U+0100
124    ///
125    /// assert_eq!(u8::try_from(a), Ok(0xFF_u8));
126    /// assert!(u8::try_from(b).is_err());
127    /// ```
128    #[inline]
129    fn try_from(c: char) -> Result<u8, Self::Error> {
130        // FIXME(const-hack): this should use map_err instead
131        match u8::try_from(u32::from(c)) {
132            Ok(b) => Ok(b),
133            Err(_) => Err(TryFromCharError(())),
134        }
135    }
136}
137
138/// Maps a `char` with a code point from U+0000 to U+FFFF (inclusive) to a `u16` in `0x0000..=0xFFFF`
139/// with the same value, failing if the code point is greater than U+FFFF.
140///
141/// This corresponds to the UCS-2 encoding, as specified in ISO/IEC 10646:2003.
142#[cfg(not(feature = "ferrocene_subset"))]
143#[stable(feature = "u16_from_char", since = "1.74.0")]
144#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
145impl const TryFrom<char> for u16 {
146    type Error = TryFromCharError;
147
148    /// Tries to convert a [`char`] into a [`u16`].
149    ///
150    /// # Examples
151    ///
152    /// ```
153    /// let trans_rights = 'âš§'; // U+26A7
154    /// let ninjas = '🥷'; // U+1F977
155    ///
156    /// assert_eq!(u16::try_from(trans_rights), Ok(0x26A7_u16));
157    /// assert!(u16::try_from(ninjas).is_err());
158    /// ```
159    #[inline]
160    fn try_from(c: char) -> Result<u16, Self::Error> {
161        // FIXME(const-hack): this should use map_err instead
162        match u16::try_from(u32::from(c)) {
163            Ok(x) => Ok(x),
164            Err(_) => Err(TryFromCharError(())),
165        }
166    }
167}
168
169/// Maps a `char` with a code point from U+0000 to U+10FFFF (inclusive) to a `usize` in
170/// `0x0000..=0x10FFFF` with the same value, failing if the final value is unrepresentable by
171/// `usize`.
172///
173/// Generally speaking, this conversion can be seen as obtaining the character's corresponding
174/// UTF-32 code point to the extent representable by pointer addresses.
175#[cfg(not(feature = "ferrocene_subset"))]
176#[stable(feature = "usize_try_from_char", since = "CURRENT_RUSTC_VERSION")]
177#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
178impl const TryFrom<char> for usize {
179    type Error = TryFromCharError;
180
181    /// Tries to convert a [`char`] into a [`usize`].
182    ///
183    /// # Examples
184    ///
185    /// ```
186    /// let a = '\u{FFFF}'; // Always succeeds.
187    /// let b = '\u{10FFFF}'; // Conditionally succeeds.
188    ///
189    /// assert_eq!(usize::try_from(a), Ok(0xFFFF));
190    ///
191    /// if size_of::<usize>() >= size_of::<u32>() {
192    ///     assert_eq!(usize::try_from(b), Ok(0x10FFFF));
193    /// } else {
194    ///     assert!(matches!(usize::try_from(b), Err(_)));
195    /// }
196    /// ```
197    #[inline]
198    fn try_from(c: char) -> Result<usize, Self::Error> {
199        // FIXME(const-hack): this should use map_err instead
200        match usize::try_from(u32::from(c)) {
201            Ok(x) => Ok(x),
202            Err(_) => Err(TryFromCharError(())),
203        }
204    }
205}
206
207/// Maps a byte in `0x00..=0xFF` to a `char` whose code point has the same value from U+0000 to U+00FF
208/// (inclusive).
209///
210/// Unicode is designed such that this effectively decodes bytes
211/// with the character encoding that IANA calls ISO-8859-1.
212/// This encoding is compatible with ASCII.
213///
214/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen),
215/// which leaves some "blanks", byte values that are not assigned to any character.
216/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
217///
218/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
219/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
220/// to punctuation and various Latin characters.
221///
222/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
223/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
224/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
225/// C0 and C1 control codes.
226#[stable(feature = "char_convert", since = "1.13.0")]
227#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
228impl const From<u8> for char {
229    /// Converts a [`u8`] into a [`char`].
230    ///
231    /// # Examples
232    ///
233    /// ```
234    /// let u = 32 as u8;
235    /// let c = char::from(u);
236    ///
237    /// assert!(4 == size_of_val(&c))
238    /// ```
239    #[inline]
240    fn from(i: u8) -> Self {
241        i as char
242    }
243}
244
245/// An error which can be returned when parsing a char.
246///
247/// This `struct` is created when using the [`char::from_str`] method.
248#[cfg(not(feature = "ferrocene_subset"))]
249#[stable(feature = "char_from_str", since = "1.20.0")]
250#[derive(Clone, Debug, PartialEq, Eq)]
251pub struct ParseCharError {
252    kind: CharErrorKind,
253}
254
255#[cfg(not(feature = "ferrocene_subset"))]
256#[derive(Copy, Clone, Debug, PartialEq, Eq)]
257enum CharErrorKind {
258    EmptyString,
259    TooManyChars,
260}
261
262#[cfg(not(feature = "ferrocene_subset"))]
263#[stable(feature = "char_from_str", since = "1.20.0")]
264impl Error for ParseCharError {}
265
266#[cfg(not(feature = "ferrocene_subset"))]
267#[stable(feature = "char_from_str", since = "1.20.0")]
268impl fmt::Display for ParseCharError {
269    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
270        match self.kind {
271            CharErrorKind::EmptyString => "cannot parse char from empty string",
272            CharErrorKind::TooManyChars => "too many characters in string",
273        }
274        .fmt(f)
275    }
276}
277
278#[cfg(not(feature = "ferrocene_subset"))]
279#[stable(feature = "char_from_str", since = "1.20.0")]
280impl FromStr for char {
281    type Err = ParseCharError;
282
283    #[inline]
284    fn from_str(s: &str) -> Result<Self, Self::Err> {
285        let mut chars = s.chars();
286        match (chars.next(), chars.next()) {
287            (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }),
288            (Some(c), None) => Ok(c),
289            _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }),
290        }
291    }
292}
293
294#[inline]
295#[allow(unnecessary_transmutes)]
296const fn char_try_from_u32(i: u32) -> Result<char, CharTryFromError> {
297    // This is an optimized version of the check
298    // (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF),
299    // which can also be written as
300    // i >= 0x110000 || (i >= 0xD800 && i < 0xE000).
301    //
302    // The XOR with 0xD800 permutes the ranges such that 0xD800..0xE000 is
303    // mapped to 0x0000..0x0800, while keeping all the high bits outside 0xFFFF the same.
304    // In particular, numbers >= 0x110000 stay in this range.
305    //
306    // Subtracting 0x800 causes 0x0000..0x0800 to wrap, meaning that a single
307    // unsigned comparison against 0x110000 - 0x800 will detect both the wrapped
308    // surrogate range as well as the numbers originally larger than 0x110000.
309    if (i ^ 0xD800).wrapping_sub(0x800) >= 0x110000 - 0x800 {
310        Err(CharTryFromError(()))
311    } else {
312        // SAFETY: checked that it's a legal unicode value
313        Ok(unsafe { transmute(i) })
314    }
315}
316
317#[cfg(not(feature = "ferrocene_subset"))]
318#[stable(feature = "try_from", since = "1.34.0")]
319#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
320impl const TryFrom<u32> for char {
321    type Error = CharTryFromError;
322
323    #[inline]
324    fn try_from(i: u32) -> Result<Self, Self::Error> {
325        char_try_from_u32(i)
326    }
327}
328
329/// The error type returned when a conversion from [`prim@u32`] to [`prim@char`] fails.
330///
331/// This `struct` is created by the [`char::try_from<u32>`](char#impl-TryFrom<u32>-for-char) method.
332/// See its documentation for more.
333#[stable(feature = "try_from", since = "1.34.0")]
334#[cfg_attr(not(feature = "ferrocene_subset"), derive(Copy, Clone, Debug, PartialEq, Eq))]
335pub struct CharTryFromError(());
336
337#[cfg(not(feature = "ferrocene_subset"))]
338#[stable(feature = "try_from", since = "1.34.0")]
339impl fmt::Display for CharTryFromError {
340    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
341        "converted integer out of range for `char`".fmt(f)
342    }
343}
344
345/// Converts a digit in the given radix to a `char`. See [`char::from_digit`].
346#[cfg(not(feature = "ferrocene_subset"))]
347#[inline]
348#[must_use]
349pub(super) const fn from_digit(num: u32, radix: u32) -> Option<char> {
350    if radix > 36 {
351        panic!("from_digit: radix is too high (maximum 36)");
352    }
353    if num < radix {
354        let num = num as u8;
355        if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) }
356    } else {
357        None
358    }
359}