Skip to main content

core/char/
convert.rs

1//! Character conversions.
2
3use crate::char::TryFromCharError;
4use crate::error::Error;
5use crate::fmt;
6use crate::mem::transmute;
7use crate::str::FromStr;
8use crate::ub_checks::assert_unsafe_precondition;
9
10/// Converts a `u32` to a `char`. See [`char::from_u32`].
11#[must_use]
12#[inline]
13pub(super) const fn from_u32(i: u32) -> Option<char> {
14    // FIXME(const-hack): once Result::ok is const fn, use it here
15    match char_try_from_u32(i) {
16        Ok(c) => Some(c),
17        Err(_) => None,
18    }
19}
20
21/// Converts a `u32` to a `char`, ignoring validity. See [`char::from_u32_unchecked`].
22#[inline]
23#[must_use]
24#[allow(unnecessary_transmutes)]
25#[track_caller]
26#[ferrocene::prevalidated]
27pub(super) const unsafe fn from_u32_unchecked(i: u32) -> char {
28    // SAFETY: the caller must guarantee that `i` is a valid char value.
29    unsafe {
30        assert_unsafe_precondition!(
31            check_language_ub,
32            "invalid value for `char`",
33            (i: u32 = i) => char_try_from_u32(i).is_ok()
34        );
35        transmute(i)
36    }
37}
38
39#[stable(feature = "char_convert", since = "1.13.0")]
40#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
41impl const From<char> for u32 {
42    /// Converts a [`char`] into a [`u32`].
43    ///
44    /// # Examples
45    ///
46    /// ```
47    /// let c = 'c';
48    /// let u = u32::from(c);
49    ///
50    /// assert!(4 == size_of_val(&u))
51    /// ```
52    #[inline]
53    fn from(c: char) -> Self {
54        c as u32
55    }
56}
57
58#[stable(feature = "more_char_conversions", since = "1.51.0")]
59#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
60impl const From<char> for u64 {
61    /// Converts a [`char`] into a [`u64`].
62    ///
63    /// # Examples
64    ///
65    /// ```
66    /// let c = '👤';
67    /// let u = u64::from(c);
68    ///
69    /// assert!(8 == size_of_val(&u))
70    /// ```
71    #[inline]
72    fn from(c: char) -> Self {
73        // The char is casted to the value of the code point, then zero-extended to 64 bit.
74        // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
75        c as u64
76    }
77}
78
79#[stable(feature = "more_char_conversions", since = "1.51.0")]
80#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
81impl const From<char> for u128 {
82    /// Converts a [`char`] into a [`u128`].
83    ///
84    /// # Examples
85    ///
86    /// ```
87    /// let c = 'âš™';
88    /// let u = u128::from(c);
89    ///
90    /// assert!(16 == size_of_val(&u))
91    /// ```
92    #[inline]
93    fn from(c: char) -> Self {
94        // The char is casted to the value of the code point, then zero-extended to 128 bit.
95        // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
96        c as u128
97    }
98}
99
100/// Maps a `char` with a code point from U+0000 to U+00FF (inclusive) to a byte in `0x00..=0xFF` with
101/// the same value, failing if the code point is greater than U+00FF.
102///
103/// See [`impl From<u8> for char`](char#impl-From<u8>-for-char) for details on the encoding.
104#[stable(feature = "u8_from_char", since = "1.59.0")]
105#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
106impl const TryFrom<char> for u8 {
107    type Error = TryFromCharError;
108
109    /// Tries to convert a [`char`] into a [`u8`].
110    ///
111    /// # Examples
112    ///
113    /// ```
114    /// let a = 'ÿ'; // U+00FF
115    /// let b = 'Ä€'; // U+0100
116    ///
117    /// assert_eq!(u8::try_from(a), Ok(0xFF_u8));
118    /// assert!(u8::try_from(b).is_err());
119    /// ```
120    #[inline]
121    fn try_from(c: char) -> Result<u8, Self::Error> {
122        // FIXME(const-hack): this should use map_err instead
123        match u8::try_from(u32::from(c)) {
124            Ok(b) => Ok(b),
125            Err(_) => Err(TryFromCharError(())),
126        }
127    }
128}
129
130/// Maps a `char` with a code point from U+0000 to U+FFFF (inclusive) to a `u16` in `0x0000..=0xFFFF`
131/// with the same value, failing if the code point is greater than U+FFFF.
132///
133/// This corresponds to the UCS-2 encoding, as specified in ISO/IEC 10646:2003.
134#[stable(feature = "u16_from_char", since = "1.74.0")]
135#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
136impl const TryFrom<char> for u16 {
137    type Error = TryFromCharError;
138
139    /// Tries to convert a [`char`] into a [`u16`].
140    ///
141    /// # Examples
142    ///
143    /// ```
144    /// let trans_rights = 'âš§'; // U+26A7
145    /// let ninjas = '🥷'; // U+1F977
146    ///
147    /// assert_eq!(u16::try_from(trans_rights), Ok(0x26A7_u16));
148    /// assert!(u16::try_from(ninjas).is_err());
149    /// ```
150    #[inline]
151    fn try_from(c: char) -> Result<u16, Self::Error> {
152        // FIXME(const-hack): this should use map_err instead
153        match u16::try_from(u32::from(c)) {
154            Ok(x) => Ok(x),
155            Err(_) => Err(TryFromCharError(())),
156        }
157    }
158}
159
160/// Maps a `char` with a code point from U+0000 to U+10FFFF (inclusive) to a `usize` in
161/// `0x0000..=0x10FFFF` with the same value, failing if the final value is unrepresentable by
162/// `usize`.
163///
164/// Generally speaking, this conversion can be seen as obtaining the character's corresponding
165/// UTF-32 code point to the extent representable by pointer addresses.
166#[stable(feature = "usize_try_from_char", since = "1.94.0")]
167#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
168impl const TryFrom<char> for usize {
169    type Error = TryFromCharError;
170
171    /// Tries to convert a [`char`] into a [`usize`].
172    ///
173    /// # Examples
174    ///
175    /// ```
176    /// let a = '\u{FFFF}'; // Always succeeds.
177    /// let b = '\u{10FFFF}'; // Conditionally succeeds.
178    ///
179    /// assert_eq!(usize::try_from(a), Ok(0xFFFF));
180    ///
181    /// if size_of::<usize>() >= size_of::<u32>() {
182    ///     assert_eq!(usize::try_from(b), Ok(0x10FFFF));
183    /// } else {
184    ///     assert!(matches!(usize::try_from(b), Err(_)));
185    /// }
186    /// ```
187    #[inline]
188    fn try_from(c: char) -> Result<usize, Self::Error> {
189        // FIXME(const-hack): this should use map_err instead
190        match usize::try_from(u32::from(c)) {
191            Ok(x) => Ok(x),
192            Err(_) => Err(TryFromCharError(())),
193        }
194    }
195}
196
197/// Maps a byte in `0x00..=0xFF` to a `char` whose code point has the same value from U+0000 to U+00FF
198/// (inclusive).
199///
200/// Unicode is designed such that this effectively decodes bytes
201/// with the character encoding that IANA calls ISO-8859-1.
202/// This encoding is compatible with ASCII.
203///
204/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen),
205/// which leaves some "blanks", byte values that are not assigned to any character.
206/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
207///
208/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
209/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
210/// to punctuation and various Latin characters.
211///
212/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
213/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
214/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
215/// C0 and C1 control codes.
216#[stable(feature = "char_convert", since = "1.13.0")]
217#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
218impl const From<u8> for char {
219    /// Converts a [`u8`] into a [`char`].
220    ///
221    /// # Examples
222    ///
223    /// ```
224    /// let u = 32 as u8;
225    /// let c = char::from(u);
226    ///
227    /// assert!(4 == size_of_val(&c))
228    /// ```
229    #[inline]
230    #[ferrocene::prevalidated]
231    fn from(i: u8) -> Self {
232        i as char
233    }
234}
235
236/// An error which can be returned when parsing a char.
237///
238/// This `struct` is created when using the [`char::from_str`] method.
239#[stable(feature = "char_from_str", since = "1.20.0")]
240#[derive(Clone, Debug, PartialEq, Eq)]
241pub struct ParseCharError {
242    kind: CharErrorKind,
243}
244
245#[derive(Copy, Clone, Debug, PartialEq, Eq)]
246enum CharErrorKind {
247    EmptyString,
248    TooManyChars,
249}
250
251#[stable(feature = "char_from_str", since = "1.20.0")]
252impl Error for ParseCharError {}
253
254#[stable(feature = "char_from_str", since = "1.20.0")]
255impl fmt::Display for ParseCharError {
256    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
257        match self.kind {
258            CharErrorKind::EmptyString => "cannot parse char from empty string",
259            CharErrorKind::TooManyChars => "too many characters in string",
260        }
261        .fmt(f)
262    }
263}
264
265#[stable(feature = "char_from_str", since = "1.20.0")]
266impl FromStr for char {
267    type Err = ParseCharError;
268
269    #[inline]
270    fn from_str(s: &str) -> Result<Self, Self::Err> {
271        let mut chars = s.chars();
272        match (chars.next(), chars.next()) {
273            (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }),
274            (Some(c), None) => Ok(c),
275            _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }),
276        }
277    }
278}
279
280#[inline]
281#[allow(unnecessary_transmutes)]
282#[ferrocene::prevalidated]
283const fn char_try_from_u32(i: u32) -> Result<char, CharTryFromError> {
284    // This is an optimized version of the check
285    // (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF),
286    // which can also be written as
287    // i >= 0x110000 || (i >= 0xD800 && i < 0xE000).
288    //
289    // The XOR with 0xD800 permutes the ranges such that 0xD800..0xE000 is
290    // mapped to 0x0000..0x0800, while keeping all the high bits outside 0xFFFF the same.
291    // In particular, numbers >= 0x110000 stay in this range.
292    //
293    // Subtracting 0x800 causes 0x0000..0x0800 to wrap, meaning that a single
294    // unsigned comparison against 0x110000 - 0x800 will detect both the wrapped
295    // surrogate range as well as the numbers originally larger than 0x110000.
296    if (i ^ 0xD800).wrapping_sub(0x800) >= 0x110000 - 0x800 {
297        Err(CharTryFromError(()))
298    } else {
299        // SAFETY: checked that it's a legal unicode value
300        Ok(unsafe { transmute(i) })
301    }
302}
303
304#[stable(feature = "try_from", since = "1.34.0")]
305#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
306impl const TryFrom<u32> for char {
307    type Error = CharTryFromError;
308
309    #[inline]
310    fn try_from(i: u32) -> Result<Self, Self::Error> {
311        char_try_from_u32(i)
312    }
313}
314
315/// The error type returned when a conversion from [`prim@u32`] to [`prim@char`] fails.
316///
317/// This `struct` is created by the [`char::try_from<u32>`](char#impl-TryFrom<u32>-for-char) method.
318/// See its documentation for more.
319#[stable(feature = "try_from", since = "1.34.0")]
320#[derive(Copy, Clone, Debug, PartialEq, Eq)]
321#[ferrocene::prevalidated]
322pub struct CharTryFromError(());
323
324#[stable(feature = "try_from", since = "1.34.0")]
325impl fmt::Display for CharTryFromError {
326    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
327        "converted integer out of range for `char`".fmt(f)
328    }
329}
330
331/// Converts a digit in the given radix to a `char`. See [`char::from_digit`].
332#[inline]
333#[must_use]
334pub(super) const fn from_digit(num: u32, radix: u32) -> Option<char> {
335    if radix > 36 {
336        panic!("from_digit: radix is too high (maximum 36)");
337    }
338    if num < radix {
339        let num = num as u8;
340        if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) }
341    } else {
342        None
343    }
344}