core/char/
convert.rs

1//! Character conversions.
2
3#[cfg(not(feature = "ferrocene_certified"))]
4use crate::char::TryFromCharError;
5#[cfg(not(feature = "ferrocene_certified"))]
6use crate::error::Error;
7#[cfg(not(feature = "ferrocene_certified"))]
8use crate::fmt;
9use crate::mem::transmute;
10#[cfg(not(feature = "ferrocene_certified"))]
11use crate::str::FromStr;
12use crate::ub_checks::assert_unsafe_precondition;
13
14/// Converts a `u32` to a `char`. See [`char::from_u32`].
15#[cfg(not(feature = "ferrocene_certified"))]
16#[must_use]
17#[inline]
18pub(super) const fn from_u32(i: u32) -> Option<char> {
19    // FIXME(const-hack): once Result::ok is const fn, use it here
20    match char_try_from_u32(i) {
21        Ok(c) => Some(c),
22        Err(_) => None,
23    }
24}
25
26/// Converts a `u32` to a `char`, ignoring validity. See [`char::from_u32_unchecked`].
27#[inline]
28#[must_use]
29#[allow(unnecessary_transmutes)]
30#[track_caller]
31pub(super) const unsafe fn from_u32_unchecked(i: u32) -> char {
32    // SAFETY: the caller must guarantee that `i` is a valid char value.
33    unsafe {
34        assert_unsafe_precondition!(
35            check_language_ub,
36            "invalid value for `char`",
37            (i: u32 = i) => char_try_from_u32(i).is_ok()
38        );
39        transmute(i)
40    }
41}
42
43#[cfg(not(feature = "ferrocene_certified"))]
44#[stable(feature = "char_convert", since = "1.13.0")]
45#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
46impl const From<char> for u32 {
47    /// Converts a [`char`] into a [`u32`].
48    ///
49    /// # Examples
50    ///
51    /// ```
52    /// let c = 'c';
53    /// let u = u32::from(c);
54    /// assert!(4 == size_of_val(&u))
55    /// ```
56    #[inline]
57    fn from(c: char) -> Self {
58        c as u32
59    }
60}
61
62#[cfg(not(feature = "ferrocene_certified"))]
63#[stable(feature = "more_char_conversions", since = "1.51.0")]
64#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
65impl const From<char> for u64 {
66    /// Converts a [`char`] into a [`u64`].
67    ///
68    /// # Examples
69    ///
70    /// ```
71    /// let c = '👤';
72    /// let u = u64::from(c);
73    /// assert!(8 == size_of_val(&u))
74    /// ```
75    #[inline]
76    fn from(c: char) -> Self {
77        // The char is casted to the value of the code point, then zero-extended to 64 bit.
78        // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
79        c as u64
80    }
81}
82
83#[cfg(not(feature = "ferrocene_certified"))]
84#[stable(feature = "more_char_conversions", since = "1.51.0")]
85#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
86impl const From<char> for u128 {
87    /// Converts a [`char`] into a [`u128`].
88    ///
89    /// # Examples
90    ///
91    /// ```
92    /// let c = 'âš™';
93    /// let u = u128::from(c);
94    /// assert!(16 == size_of_val(&u))
95    /// ```
96    #[inline]
97    fn from(c: char) -> Self {
98        // The char is casted to the value of the code point, then zero-extended to 128 bit.
99        // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
100        c as u128
101    }
102}
103
104/// Maps a `char` with code point in U+0000..=U+00FF to a byte in 0x00..=0xFF with same value,
105/// failing if the code point is greater than U+00FF.
106///
107/// See [`impl From<u8> for char`](char#impl-From<u8>-for-char) for details on the encoding.
108#[cfg(not(feature = "ferrocene_certified"))]
109#[stable(feature = "u8_from_char", since = "1.59.0")]
110#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
111impl const TryFrom<char> for u8 {
112    type Error = TryFromCharError;
113
114    /// Tries to convert a [`char`] into a [`u8`].
115    ///
116    /// # Examples
117    ///
118    /// ```
119    /// let a = 'ÿ'; // U+00FF
120    /// let b = 'Ä€'; // U+0100
121    /// assert_eq!(u8::try_from(a), Ok(0xFF_u8));
122    /// assert!(u8::try_from(b).is_err());
123    /// ```
124    #[inline]
125    fn try_from(c: char) -> Result<u8, Self::Error> {
126        // FIXME(const-hack): this should use map_err instead
127        match u8::try_from(u32::from(c)) {
128            Ok(b) => Ok(b),
129            Err(_) => Err(TryFromCharError(())),
130        }
131    }
132}
133
134/// Maps a `char` with code point in U+0000..=U+FFFF to a `u16` in 0x0000..=0xFFFF with same value,
135/// failing if the code point is greater than U+FFFF.
136///
137/// This corresponds to the UCS-2 encoding, as specified in ISO/IEC 10646:2003.
138#[cfg(not(feature = "ferrocene_certified"))]
139#[stable(feature = "u16_from_char", since = "1.74.0")]
140#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
141impl const TryFrom<char> for u16 {
142    type Error = TryFromCharError;
143
144    /// Tries to convert a [`char`] into a [`u16`].
145    ///
146    /// # Examples
147    ///
148    /// ```
149    /// let trans_rights = 'âš§'; // U+26A7
150    /// let ninjas = '🥷'; // U+1F977
151    /// assert_eq!(u16::try_from(trans_rights), Ok(0x26A7_u16));
152    /// assert!(u16::try_from(ninjas).is_err());
153    /// ```
154    #[inline]
155    fn try_from(c: char) -> Result<u16, Self::Error> {
156        // FIXME(const-hack): this should use map_err instead
157        match u16::try_from(u32::from(c)) {
158            Ok(x) => Ok(x),
159            Err(_) => Err(TryFromCharError(())),
160        }
161    }
162}
163
164/// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF.
165///
166/// Unicode is designed such that this effectively decodes bytes
167/// with the character encoding that IANA calls ISO-8859-1.
168/// This encoding is compatible with ASCII.
169///
170/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen),
171/// which leaves some "blanks", byte values that are not assigned to any character.
172/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
173///
174/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
175/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
176/// to punctuation and various Latin characters.
177///
178/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
179/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
180/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
181/// C0 and C1 control codes.
182#[cfg(not(feature = "ferrocene_certified"))]
183#[stable(feature = "char_convert", since = "1.13.0")]
184#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
185impl const From<u8> for char {
186    /// Converts a [`u8`] into a [`char`].
187    ///
188    /// # Examples
189    ///
190    /// ```
191    /// let u = 32 as u8;
192    /// let c = char::from(u);
193    /// assert!(4 == size_of_val(&c))
194    /// ```
195    #[inline]
196    fn from(i: u8) -> Self {
197        i as char
198    }
199}
200
201/// An error which can be returned when parsing a char.
202///
203/// This `struct` is created when using the [`char::from_str`] method.
204#[cfg(not(feature = "ferrocene_certified"))]
205#[stable(feature = "char_from_str", since = "1.20.0")]
206#[derive(Clone, Debug, PartialEq, Eq)]
207pub struct ParseCharError {
208    kind: CharErrorKind,
209}
210
211#[cfg(not(feature = "ferrocene_certified"))]
212#[derive(Copy, Clone, Debug, PartialEq, Eq)]
213enum CharErrorKind {
214    EmptyString,
215    TooManyChars,
216}
217
218#[cfg(not(feature = "ferrocene_certified"))]
219#[stable(feature = "char_from_str", since = "1.20.0")]
220impl Error for ParseCharError {}
221
222#[cfg(not(feature = "ferrocene_certified"))]
223#[stable(feature = "char_from_str", since = "1.20.0")]
224impl fmt::Display for ParseCharError {
225    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
226        match self.kind {
227            CharErrorKind::EmptyString => "cannot parse char from empty string",
228            CharErrorKind::TooManyChars => "too many characters in string",
229        }
230        .fmt(f)
231    }
232}
233
234#[cfg(not(feature = "ferrocene_certified"))]
235#[stable(feature = "char_from_str", since = "1.20.0")]
236impl FromStr for char {
237    type Err = ParseCharError;
238
239    #[inline]
240    fn from_str(s: &str) -> Result<Self, Self::Err> {
241        let mut chars = s.chars();
242        match (chars.next(), chars.next()) {
243            (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }),
244            (Some(c), None) => Ok(c),
245            _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }),
246        }
247    }
248}
249
250#[inline]
251#[allow(unnecessary_transmutes)]
252const fn char_try_from_u32(i: u32) -> Result<char, CharTryFromError> {
253    // This is an optimized version of the check
254    // (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF),
255    // which can also be written as
256    // i >= 0x110000 || (i >= 0xD800 && i < 0xE000).
257    //
258    // The XOR with 0xD800 permutes the ranges such that 0xD800..0xE000 is
259    // mapped to 0x0000..0x0800, while keeping all the high bits outside 0xFFFF the same.
260    // In particular, numbers >= 0x110000 stay in this range.
261    //
262    // Subtracting 0x800 causes 0x0000..0x0800 to wrap, meaning that a single
263    // unsigned comparison against 0x110000 - 0x800 will detect both the wrapped
264    // surrogate range as well as the numbers originally larger than 0x110000.
265    //
266    if (i ^ 0xD800).wrapping_sub(0x800) >= 0x110000 - 0x800 {
267        Err(CharTryFromError(()))
268    } else {
269        // SAFETY: checked that it's a legal unicode value
270        Ok(unsafe { transmute(i) })
271    }
272}
273
274#[cfg(not(feature = "ferrocene_certified"))]
275#[stable(feature = "try_from", since = "1.34.0")]
276#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
277impl const TryFrom<u32> for char {
278    type Error = CharTryFromError;
279
280    #[inline]
281    fn try_from(i: u32) -> Result<Self, Self::Error> {
282        char_try_from_u32(i)
283    }
284}
285
286/// The error type returned when a conversion from [`prim@u32`] to [`prim@char`] fails.
287///
288/// This `struct` is created by the [`char::try_from<u32>`](char#impl-TryFrom<u32>-for-char) method.
289/// See its documentation for more.
290#[stable(feature = "try_from", since = "1.34.0")]
291#[cfg_attr(not(feature = "ferrocene_certified"), derive(Copy, Clone, Debug, PartialEq, Eq))]
292pub struct CharTryFromError(());
293
294#[cfg(not(feature = "ferrocene_certified"))]
295#[stable(feature = "try_from", since = "1.34.0")]
296impl fmt::Display for CharTryFromError {
297    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
298        "converted integer out of range for `char`".fmt(f)
299    }
300}
301
302/// Converts a digit in the given radix to a `char`. See [`char::from_digit`].
303#[cfg(not(feature = "ferrocene_certified"))]
304#[inline]
305#[must_use]
306pub(super) const fn from_digit(num: u32, radix: u32) -> Option<char> {
307    if radix > 36 {
308        panic!("from_digit: radix is too high (maximum 36)");
309    }
310    if num < radix {
311        let num = num as u8;
312        if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) }
313    } else {
314        None
315    }
316}