core/char/convert.rs
1//! Character conversions.
2
3#[cfg(not(feature = "ferrocene_subset"))]
4use crate::char::TryFromCharError;
5#[cfg(not(feature = "ferrocene_subset"))]
6use crate::error::Error;
7#[cfg(not(feature = "ferrocene_subset"))]
8use crate::fmt;
9use crate::mem::transmute;
10#[cfg(not(feature = "ferrocene_subset"))]
11use crate::str::FromStr;
12use crate::ub_checks::assert_unsafe_precondition;
13
14/// Converts a `u32` to a `char`. See [`char::from_u32`].
15#[cfg(not(feature = "ferrocene_subset"))]
16#[must_use]
17#[inline]
18pub(super) const fn from_u32(i: u32) -> Option<char> {
19 // FIXME(const-hack): once Result::ok is const fn, use it here
20 match char_try_from_u32(i) {
21 Ok(c) => Some(c),
22 Err(_) => None,
23 }
24}
25
26/// Converts a `u32` to a `char`, ignoring validity. See [`char::from_u32_unchecked`].
27#[inline]
28#[must_use]
29#[allow(unnecessary_transmutes)]
30#[track_caller]
31pub(super) const unsafe fn from_u32_unchecked(i: u32) -> char {
32 // SAFETY: the caller must guarantee that `i` is a valid char value.
33 unsafe {
34 assert_unsafe_precondition!(
35 check_language_ub,
36 "invalid value for `char`",
37 (i: u32 = i) => char_try_from_u32(i).is_ok()
38 );
39 transmute(i)
40 }
41}
42
43#[cfg(not(feature = "ferrocene_subset"))]
44#[stable(feature = "char_convert", since = "1.13.0")]
45#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
46impl const From<char> for u32 {
47 /// Converts a [`char`] into a [`u32`].
48 ///
49 /// # Examples
50 ///
51 /// ```
52 /// let c = 'c';
53 /// let u = u32::from(c);
54 ///
55 /// assert!(4 == size_of_val(&u))
56 /// ```
57 #[inline]
58 fn from(c: char) -> Self {
59 c as u32
60 }
61}
62
63#[cfg(not(feature = "ferrocene_subset"))]
64#[stable(feature = "more_char_conversions", since = "1.51.0")]
65#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
66impl const From<char> for u64 {
67 /// Converts a [`char`] into a [`u64`].
68 ///
69 /// # Examples
70 ///
71 /// ```
72 /// let c = '👤';
73 /// let u = u64::from(c);
74 ///
75 /// assert!(8 == size_of_val(&u))
76 /// ```
77 #[inline]
78 fn from(c: char) -> Self {
79 // The char is casted to the value of the code point, then zero-extended to 64 bit.
80 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
81 c as u64
82 }
83}
84
85#[cfg(not(feature = "ferrocene_subset"))]
86#[stable(feature = "more_char_conversions", since = "1.51.0")]
87#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
88impl const From<char> for u128 {
89 /// Converts a [`char`] into a [`u128`].
90 ///
91 /// # Examples
92 ///
93 /// ```
94 /// let c = 'âš™';
95 /// let u = u128::from(c);
96 ///
97 /// assert!(16 == size_of_val(&u))
98 /// ```
99 #[inline]
100 fn from(c: char) -> Self {
101 // The char is casted to the value of the code point, then zero-extended to 128 bit.
102 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
103 c as u128
104 }
105}
106
107/// Maps a `char` with a code point from U+0000 to U+00FF (inclusive) to a byte in `0x00..=0xFF` with
108/// the same value, failing if the code point is greater than U+00FF.
109///
110/// See [`impl From<u8> for char`](char#impl-From<u8>-for-char) for details on the encoding.
111#[cfg(not(feature = "ferrocene_subset"))]
112#[stable(feature = "u8_from_char", since = "1.59.0")]
113#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
114impl const TryFrom<char> for u8 {
115 type Error = TryFromCharError;
116
117 /// Tries to convert a [`char`] into a [`u8`].
118 ///
119 /// # Examples
120 ///
121 /// ```
122 /// let a = 'ÿ'; // U+00FF
123 /// let b = 'Ä€'; // U+0100
124 ///
125 /// assert_eq!(u8::try_from(a), Ok(0xFF_u8));
126 /// assert!(u8::try_from(b).is_err());
127 /// ```
128 #[inline]
129 fn try_from(c: char) -> Result<u8, Self::Error> {
130 // FIXME(const-hack): this should use map_err instead
131 match u8::try_from(u32::from(c)) {
132 Ok(b) => Ok(b),
133 Err(_) => Err(TryFromCharError(())),
134 }
135 }
136}
137
138/// Maps a `char` with a code point from U+0000 to U+FFFF (inclusive) to a `u16` in `0x0000..=0xFFFF`
139/// with the same value, failing if the code point is greater than U+FFFF.
140///
141/// This corresponds to the UCS-2 encoding, as specified in ISO/IEC 10646:2003.
142#[cfg(not(feature = "ferrocene_subset"))]
143#[stable(feature = "u16_from_char", since = "1.74.0")]
144#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
145impl const TryFrom<char> for u16 {
146 type Error = TryFromCharError;
147
148 /// Tries to convert a [`char`] into a [`u16`].
149 ///
150 /// # Examples
151 ///
152 /// ```
153 /// let trans_rights = 'âš§'; // U+26A7
154 /// let ninjas = '🥷'; // U+1F977
155 ///
156 /// assert_eq!(u16::try_from(trans_rights), Ok(0x26A7_u16));
157 /// assert!(u16::try_from(ninjas).is_err());
158 /// ```
159 #[inline]
160 fn try_from(c: char) -> Result<u16, Self::Error> {
161 // FIXME(const-hack): this should use map_err instead
162 match u16::try_from(u32::from(c)) {
163 Ok(x) => Ok(x),
164 Err(_) => Err(TryFromCharError(())),
165 }
166 }
167}
168
169/// Maps a `char` with a code point from U+0000 to U+10FFFF (inclusive) to a `usize` in
170/// `0x0000..=0x10FFFF` with the same value, failing if the final value is unrepresentable by
171/// `usize`.
172///
173/// Generally speaking, this conversion can be seen as obtaining the character's corresponding
174/// UTF-32 code point to the extent representable by pointer addresses.
175#[cfg(not(feature = "ferrocene_subset"))]
176#[stable(feature = "usize_try_from_char", since = "CURRENT_RUSTC_VERSION")]
177#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
178impl const TryFrom<char> for usize {
179 type Error = TryFromCharError;
180
181 /// Tries to convert a [`char`] into a [`usize`].
182 ///
183 /// # Examples
184 ///
185 /// ```
186 /// let a = '\u{FFFF}'; // Always succeeds.
187 /// let b = '\u{10FFFF}'; // Conditionally succeeds.
188 ///
189 /// assert_eq!(usize::try_from(a), Ok(0xFFFF));
190 ///
191 /// if size_of::<usize>() >= size_of::<u32>() {
192 /// assert_eq!(usize::try_from(b), Ok(0x10FFFF));
193 /// } else {
194 /// assert!(matches!(usize::try_from(b), Err(_)));
195 /// }
196 /// ```
197 #[inline]
198 fn try_from(c: char) -> Result<usize, Self::Error> {
199 // FIXME(const-hack): this should use map_err instead
200 match usize::try_from(u32::from(c)) {
201 Ok(x) => Ok(x),
202 Err(_) => Err(TryFromCharError(())),
203 }
204 }
205}
206
207/// Maps a byte in `0x00..=0xFF` to a `char` whose code point has the same value from U+0000 to U+00FF
208/// (inclusive).
209///
210/// Unicode is designed such that this effectively decodes bytes
211/// with the character encoding that IANA calls ISO-8859-1.
212/// This encoding is compatible with ASCII.
213///
214/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen),
215/// which leaves some "blanks", byte values that are not assigned to any character.
216/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
217///
218/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
219/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
220/// to punctuation and various Latin characters.
221///
222/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
223/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
224/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
225/// C0 and C1 control codes.
226#[stable(feature = "char_convert", since = "1.13.0")]
227#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
228impl const From<u8> for char {
229 /// Converts a [`u8`] into a [`char`].
230 ///
231 /// # Examples
232 ///
233 /// ```
234 /// let u = 32 as u8;
235 /// let c = char::from(u);
236 ///
237 /// assert!(4 == size_of_val(&c))
238 /// ```
239 #[inline]
240 fn from(i: u8) -> Self {
241 i as char
242 }
243}
244
245/// An error which can be returned when parsing a char.
246///
247/// This `struct` is created when using the [`char::from_str`] method.
248#[cfg(not(feature = "ferrocene_subset"))]
249#[stable(feature = "char_from_str", since = "1.20.0")]
250#[derive(Clone, Debug, PartialEq, Eq)]
251pub struct ParseCharError {
252 kind: CharErrorKind,
253}
254
255#[cfg(not(feature = "ferrocene_subset"))]
256#[derive(Copy, Clone, Debug, PartialEq, Eq)]
257enum CharErrorKind {
258 EmptyString,
259 TooManyChars,
260}
261
262#[cfg(not(feature = "ferrocene_subset"))]
263#[stable(feature = "char_from_str", since = "1.20.0")]
264impl Error for ParseCharError {}
265
266#[cfg(not(feature = "ferrocene_subset"))]
267#[stable(feature = "char_from_str", since = "1.20.0")]
268impl fmt::Display for ParseCharError {
269 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
270 match self.kind {
271 CharErrorKind::EmptyString => "cannot parse char from empty string",
272 CharErrorKind::TooManyChars => "too many characters in string",
273 }
274 .fmt(f)
275 }
276}
277
278#[cfg(not(feature = "ferrocene_subset"))]
279#[stable(feature = "char_from_str", since = "1.20.0")]
280impl FromStr for char {
281 type Err = ParseCharError;
282
283 #[inline]
284 fn from_str(s: &str) -> Result<Self, Self::Err> {
285 let mut chars = s.chars();
286 match (chars.next(), chars.next()) {
287 (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }),
288 (Some(c), None) => Ok(c),
289 _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }),
290 }
291 }
292}
293
294#[inline]
295#[allow(unnecessary_transmutes)]
296const fn char_try_from_u32(i: u32) -> Result<char, CharTryFromError> {
297 // This is an optimized version of the check
298 // (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF),
299 // which can also be written as
300 // i >= 0x110000 || (i >= 0xD800 && i < 0xE000).
301 //
302 // The XOR with 0xD800 permutes the ranges such that 0xD800..0xE000 is
303 // mapped to 0x0000..0x0800, while keeping all the high bits outside 0xFFFF the same.
304 // In particular, numbers >= 0x110000 stay in this range.
305 //
306 // Subtracting 0x800 causes 0x0000..0x0800 to wrap, meaning that a single
307 // unsigned comparison against 0x110000 - 0x800 will detect both the wrapped
308 // surrogate range as well as the numbers originally larger than 0x110000.
309 if (i ^ 0xD800).wrapping_sub(0x800) >= 0x110000 - 0x800 {
310 Err(CharTryFromError(()))
311 } else {
312 // SAFETY: checked that it's a legal unicode value
313 Ok(unsafe { transmute(i) })
314 }
315}
316
317#[cfg(not(feature = "ferrocene_subset"))]
318#[stable(feature = "try_from", since = "1.34.0")]
319#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
320impl const TryFrom<u32> for char {
321 type Error = CharTryFromError;
322
323 #[inline]
324 fn try_from(i: u32) -> Result<Self, Self::Error> {
325 char_try_from_u32(i)
326 }
327}
328
329/// The error type returned when a conversion from [`prim@u32`] to [`prim@char`] fails.
330///
331/// This `struct` is created by the [`char::try_from<u32>`](char#impl-TryFrom<u32>-for-char) method.
332/// See its documentation for more.
333#[stable(feature = "try_from", since = "1.34.0")]
334#[cfg_attr(not(feature = "ferrocene_subset"), derive(Copy, Clone, Debug, PartialEq, Eq))]
335pub struct CharTryFromError(());
336
337#[cfg(not(feature = "ferrocene_subset"))]
338#[stable(feature = "try_from", since = "1.34.0")]
339impl fmt::Display for CharTryFromError {
340 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
341 "converted integer out of range for `char`".fmt(f)
342 }
343}
344
345/// Converts a digit in the given radix to a `char`. See [`char::from_digit`].
346#[cfg(not(feature = "ferrocene_subset"))]
347#[inline]
348#[must_use]
349pub(super) const fn from_digit(num: u32, radix: u32) -> Option<char> {
350 if radix > 36 {
351 panic!("from_digit: radix is too high (maximum 36)");
352 }
353 if num < radix {
354 let num = num as u8;
355 if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) }
356 } else {
357 None
358 }
359}