core/char/methods.rs
1//! impl char {}
2
3use super::*;
4#[cfg(not(feature = "ferrocene_certified"))]
5use crate::panic::const_panic;
6#[cfg(not(feature = "ferrocene_certified"))]
7use crate::slice;
8#[cfg(not(feature = "ferrocene_certified"))]
9use crate::str::from_utf8_unchecked_mut;
10#[cfg(not(feature = "ferrocene_certified"))]
11use crate::ub_checks::assert_unsafe_precondition;
12#[cfg(not(feature = "ferrocene_certified"))]
13use crate::unicode::printable::is_printable;
14#[cfg(not(feature = "ferrocene_certified"))]
15use crate::unicode::{self, conversions};
16
17impl char {
18 /// The lowest valid code point a `char` can have, `'\0'`.
19 ///
20 /// Unlike integer types, `char` actually has a gap in the middle,
21 /// meaning that the range of possible `char`s is smaller than you
22 /// might expect. Ranges of `char` will automatically hop this gap
23 /// for you:
24 ///
25 /// ```
26 /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
27 /// let size = (char::MIN..=char::MAX).count() as u32;
28 /// assert!(size < dist);
29 /// ```
30 ///
31 /// Despite this gap, the `MIN` and [`MAX`] values can be used as bounds for
32 /// all `char` values.
33 ///
34 /// [`MAX`]: char::MAX
35 ///
36 /// # Examples
37 ///
38 /// ```
39 /// # fn something_which_returns_char() -> char { 'a' }
40 /// let c: char = something_which_returns_char();
41 /// assert!(char::MIN <= c);
42 ///
43 /// let value_at_min = u32::from(char::MIN);
44 /// assert_eq!(char::from_u32(value_at_min), Some('\0'));
45 /// ```
46 #[stable(feature = "char_min", since = "1.83.0")]
47 pub const MIN: char = '\0';
48
49 /// The highest valid code point a `char` can have, `'\u{10FFFF}'`.
50 ///
51 /// Unlike integer types, `char` actually has a gap in the middle,
52 /// meaning that the range of possible `char`s is smaller than you
53 /// might expect. Ranges of `char` will automatically hop this gap
54 /// for you:
55 ///
56 /// ```
57 /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
58 /// let size = (char::MIN..=char::MAX).count() as u32;
59 /// assert!(size < dist);
60 /// ```
61 ///
62 /// Despite this gap, the [`MIN`] and `MAX` values can be used as bounds for
63 /// all `char` values.
64 ///
65 /// [`MIN`]: char::MIN
66 ///
67 /// # Examples
68 ///
69 /// ```
70 /// # fn something_which_returns_char() -> char { 'a' }
71 /// let c: char = something_which_returns_char();
72 /// assert!(c <= char::MAX);
73 ///
74 /// let value_at_max = u32::from(char::MAX);
75 /// assert_eq!(char::from_u32(value_at_max), Some('\u{10FFFF}'));
76 /// assert_eq!(char::from_u32(value_at_max + 1), None);
77 /// ```
78 #[stable(feature = "assoc_char_consts", since = "1.52.0")]
79 pub const MAX: char = '\u{10FFFF}';
80
81 /// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
82 /// UTF-8 encoding.
83 #[stable(feature = "char_max_len_assoc", since = "CURRENT_RUSTC_VERSION")]
84 pub const MAX_LEN_UTF8: usize = 4;
85
86 /// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
87 /// to UTF-16 encoding.
88 #[stable(feature = "char_max_len_assoc", since = "CURRENT_RUSTC_VERSION")]
89 pub const MAX_LEN_UTF16: usize = 2;
90
91 /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
92 /// decoding error.
93 ///
94 /// It can occur, for example, when giving ill-formed UTF-8 bytes to
95 /// [`String::from_utf8_lossy`](../std/string/struct.String.html#method.from_utf8_lossy).
96 #[stable(feature = "assoc_char_consts", since = "1.52.0")]
97 pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
98
99 /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
100 /// `char` and `str` methods are based on.
101 ///
102 /// New versions of Unicode are released regularly and subsequently all methods
103 /// in the standard library depending on Unicode are updated. Therefore the
104 /// behavior of some `char` and `str` methods and the value of this constant
105 /// changes over time. This is *not* considered to be a breaking change.
106 ///
107 /// The version numbering scheme is explained in
108 /// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4).
109 #[stable(feature = "assoc_char_consts", since = "1.52.0")]
110 #[cfg(not(feature = "ferrocene_certified"))]
111 pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION;
112
113 /// Creates an iterator over the native endian UTF-16 encoded code points in `iter`,
114 /// returning unpaired surrogates as `Err`s.
115 ///
116 /// # Examples
117 ///
118 /// Basic usage:
119 ///
120 /// ```
121 /// // 𝄞mus<invalid>ic<invalid>
122 /// let v = [
123 /// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
124 /// ];
125 ///
126 /// assert_eq!(
127 /// char::decode_utf16(v)
128 /// .map(|r| r.map_err(|e| e.unpaired_surrogate()))
129 /// .collect::<Vec<_>>(),
130 /// vec![
131 /// Ok('𝄞'),
132 /// Ok('m'), Ok('u'), Ok('s'),
133 /// Err(0xDD1E),
134 /// Ok('i'), Ok('c'),
135 /// Err(0xD834)
136 /// ]
137 /// );
138 /// ```
139 ///
140 /// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
141 ///
142 /// ```
143 /// // 𝄞mus<invalid>ic<invalid>
144 /// let v = [
145 /// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
146 /// ];
147 ///
148 /// assert_eq!(
149 /// char::decode_utf16(v)
150 /// .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER))
151 /// .collect::<String>(),
152 /// "𝄞mus�ic�"
153 /// );
154 /// ```
155 #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
156 #[inline]
157 #[cfg(not(feature = "ferrocene_certified"))]
158 pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
159 super::decode::decode_utf16(iter)
160 }
161
162 /// Converts a `u32` to a `char`.
163 ///
164 /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
165 /// [`as`](../std/keyword.as.html):
166 ///
167 /// ```
168 /// let c = '💯';
169 /// let i = c as u32;
170 ///
171 /// assert_eq!(128175, i);
172 /// ```
173 ///
174 /// However, the reverse is not true: not all valid [`u32`]s are valid
175 /// `char`s. `from_u32()` will return `None` if the input is not a valid value
176 /// for a `char`.
177 ///
178 /// For an unsafe version of this function which ignores these checks, see
179 /// [`from_u32_unchecked`].
180 ///
181 /// [`from_u32_unchecked`]: #method.from_u32_unchecked
182 ///
183 /// # Examples
184 ///
185 /// Basic usage:
186 ///
187 /// ```
188 /// let c = char::from_u32(0x2764);
189 ///
190 /// assert_eq!(Some('❤'), c);
191 /// ```
192 ///
193 /// Returning `None` when the input is not a valid `char`:
194 ///
195 /// ```
196 /// let c = char::from_u32(0x110000);
197 ///
198 /// assert_eq!(None, c);
199 /// ```
200 #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
201 #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
202 #[must_use]
203 #[inline]
204 #[cfg(not(feature = "ferrocene_certified"))]
205 pub const fn from_u32(i: u32) -> Option<char> {
206 super::convert::from_u32(i)
207 }
208
209 /// Converts a `u32` to a `char`, ignoring validity.
210 ///
211 /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
212 /// `as`:
213 ///
214 /// ```
215 /// let c = '💯';
216 /// let i = c as u32;
217 ///
218 /// assert_eq!(128175, i);
219 /// ```
220 ///
221 /// However, the reverse is not true: not all valid [`u32`]s are valid
222 /// `char`s. `from_u32_unchecked()` will ignore this, and blindly cast to
223 /// `char`, possibly creating an invalid one.
224 ///
225 /// # Safety
226 ///
227 /// This function is unsafe, as it may construct invalid `char` values.
228 ///
229 /// For a safe version of this function, see the [`from_u32`] function.
230 ///
231 /// [`from_u32`]: #method.from_u32
232 ///
233 /// # Examples
234 ///
235 /// Basic usage:
236 ///
237 /// ```
238 /// let c = unsafe { char::from_u32_unchecked(0x2764) };
239 ///
240 /// assert_eq!('❤', c);
241 /// ```
242 #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
243 #[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")]
244 #[must_use]
245 #[inline]
246 pub const unsafe fn from_u32_unchecked(i: u32) -> char {
247 // SAFETY: the safety contract must be upheld by the caller.
248 unsafe { super::convert::from_u32_unchecked(i) }
249 }
250
251 /// Converts a digit in the given radix to a `char`.
252 ///
253 /// A 'radix' here is sometimes also called a 'base'. A radix of two
254 /// indicates a binary number, a radix of ten, decimal, and a radix of
255 /// sixteen, hexadecimal, to give some common values. Arbitrary
256 /// radices are supported.
257 ///
258 /// `from_digit()` will return `None` if the input is not a digit in
259 /// the given radix.
260 ///
261 /// # Panics
262 ///
263 /// Panics if given a radix larger than 36.
264 ///
265 /// # Examples
266 ///
267 /// Basic usage:
268 ///
269 /// ```
270 /// let c = char::from_digit(4, 10);
271 ///
272 /// assert_eq!(Some('4'), c);
273 ///
274 /// // Decimal 11 is a single digit in base 16
275 /// let c = char::from_digit(11, 16);
276 ///
277 /// assert_eq!(Some('b'), c);
278 /// ```
279 ///
280 /// Returning `None` when the input is not a digit:
281 ///
282 /// ```
283 /// let c = char::from_digit(20, 10);
284 ///
285 /// assert_eq!(None, c);
286 /// ```
287 ///
288 /// Passing a large radix, causing a panic:
289 ///
290 /// ```should_panic
291 /// // this panics
292 /// let _c = char::from_digit(1, 37);
293 /// ```
294 #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
295 #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
296 #[must_use]
297 #[inline]
298 #[cfg(not(feature = "ferrocene_certified"))]
299 pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
300 super::convert::from_digit(num, radix)
301 }
302
303 /// Checks if a `char` is a digit in the given radix.
304 ///
305 /// A 'radix' here is sometimes also called a 'base'. A radix of two
306 /// indicates a binary number, a radix of ten, decimal, and a radix of
307 /// sixteen, hexadecimal, to give some common values. Arbitrary
308 /// radices are supported.
309 ///
310 /// Compared to [`is_numeric()`], this function only recognizes the characters
311 /// `0-9`, `a-z` and `A-Z`.
312 ///
313 /// 'Digit' is defined to be only the following characters:
314 ///
315 /// * `0-9`
316 /// * `a-z`
317 /// * `A-Z`
318 ///
319 /// For a more comprehensive understanding of 'digit', see [`is_numeric()`].
320 ///
321 /// [`is_numeric()`]: #method.is_numeric
322 ///
323 /// # Panics
324 ///
325 /// Panics if given a radix smaller than 2 or larger than 36.
326 ///
327 /// # Examples
328 ///
329 /// Basic usage:
330 ///
331 /// ```
332 /// assert!('1'.is_digit(10));
333 /// assert!('f'.is_digit(16));
334 /// assert!(!'f'.is_digit(10));
335 /// ```
336 ///
337 /// Passing a large radix, causing a panic:
338 ///
339 /// ```should_panic
340 /// // this panics
341 /// '1'.is_digit(37);
342 /// ```
343 ///
344 /// Passing a small radix, causing a panic:
345 ///
346 /// ```should_panic
347 /// // this panics
348 /// '1'.is_digit(1);
349 /// ```
350 #[stable(feature = "rust1", since = "1.0.0")]
351 #[rustc_const_stable(feature = "const_char_classify", since = "1.87.0")]
352 #[inline]
353 #[cfg(not(feature = "ferrocene_certified"))]
354 pub const fn is_digit(self, radix: u32) -> bool {
355 self.to_digit(radix).is_some()
356 }
357
358 /// Converts a `char` to a digit in the given radix.
359 ///
360 /// A 'radix' here is sometimes also called a 'base'. A radix of two
361 /// indicates a binary number, a radix of ten, decimal, and a radix of
362 /// sixteen, hexadecimal, to give some common values. Arbitrary
363 /// radices are supported.
364 ///
365 /// 'Digit' is defined to be only the following characters:
366 ///
367 /// * `0-9`
368 /// * `a-z`
369 /// * `A-Z`
370 ///
371 /// # Errors
372 ///
373 /// Returns `None` if the `char` does not refer to a digit in the given radix.
374 ///
375 /// # Panics
376 ///
377 /// Panics if given a radix smaller than 2 or larger than 36.
378 ///
379 /// # Examples
380 ///
381 /// Basic usage:
382 ///
383 /// ```
384 /// assert_eq!('1'.to_digit(10), Some(1));
385 /// assert_eq!('f'.to_digit(16), Some(15));
386 /// ```
387 ///
388 /// Passing a non-digit results in failure:
389 ///
390 /// ```
391 /// assert_eq!('f'.to_digit(10), None);
392 /// assert_eq!('z'.to_digit(16), None);
393 /// ```
394 ///
395 /// Passing a large radix, causing a panic:
396 ///
397 /// ```should_panic
398 /// // this panics
399 /// let _ = '1'.to_digit(37);
400 /// ```
401 /// Passing a small radix, causing a panic:
402 ///
403 /// ```should_panic
404 /// // this panics
405 /// let _ = '1'.to_digit(1);
406 /// ```
407 #[stable(feature = "rust1", since = "1.0.0")]
408 #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
409 #[rustc_diagnostic_item = "char_to_digit"]
410 #[must_use = "this returns the result of the operation, \
411 without modifying the original"]
412 #[inline]
413 pub const fn to_digit(self, radix: u32) -> Option<u32> {
414 assert!(
415 radix >= 2 && radix <= 36,
416 "to_digit: invalid radix -- radix must be in the range 2 to 36 inclusive"
417 );
418 // check radix to remove letter handling code when radix is a known constant
419 let value = if self > '9' && radix > 10 {
420 // mask to convert ASCII letters to uppercase
421 const TO_UPPERCASE_MASK: u32 = !0b0010_0000;
422 // Converts an ASCII letter to its corresponding integer value:
423 // A-Z => 10-35, a-z => 10-35. Other characters produce values >= 36.
424 //
425 // Add Overflow Safety:
426 // By applying the mask after the subtraction, the first addendum is
427 // constrained such that it never exceeds u32::MAX - 0x20.
428 ((self as u32).wrapping_sub('A' as u32) & TO_UPPERCASE_MASK) + 10
429 } else {
430 // convert digit to value, non-digits wrap to values > 36
431 (self as u32).wrapping_sub('0' as u32)
432 };
433 // FIXME(const-hack): once then_some is const fn, use it here
434 if value < radix { Some(value) } else { None }
435 }
436
437 /// Returns an iterator that yields the hexadecimal Unicode escape of a
438 /// character as `char`s.
439 ///
440 /// This will escape characters with the Rust syntax of the form
441 /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation.
442 ///
443 /// # Examples
444 ///
445 /// As an iterator:
446 ///
447 /// ```
448 /// for c in '❤'.escape_unicode() {
449 /// print!("{c}");
450 /// }
451 /// println!();
452 /// ```
453 ///
454 /// Using `println!` directly:
455 ///
456 /// ```
457 /// println!("{}", '❤'.escape_unicode());
458 /// ```
459 ///
460 /// Both are equivalent to:
461 ///
462 /// ```
463 /// println!("\\u{{2764}}");
464 /// ```
465 ///
466 /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
467 ///
468 /// ```
469 /// assert_eq!('❤'.escape_unicode().to_string(), "\\u{2764}");
470 /// ```
471 #[must_use = "this returns the escaped char as an iterator, \
472 without modifying the original"]
473 #[stable(feature = "rust1", since = "1.0.0")]
474 #[inline]
475 #[cfg(not(feature = "ferrocene_certified"))]
476 pub fn escape_unicode(self) -> EscapeUnicode {
477 EscapeUnicode::new(self)
478 }
479
480 /// An extended version of `escape_debug` that optionally permits escaping
481 /// Extended Grapheme codepoints, single quotes, and double quotes. This
482 /// allows us to format characters like nonspacing marks better when they're
483 /// at the start of a string, and allows escaping single quotes in
484 /// characters, and double quotes in strings.
485 #[inline]
486 #[cfg(not(feature = "ferrocene_certified"))]
487 pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
488 match self {
489 '\0' => EscapeDebug::backslash(ascii::Char::Digit0),
490 '\t' => EscapeDebug::backslash(ascii::Char::SmallT),
491 '\r' => EscapeDebug::backslash(ascii::Char::SmallR),
492 '\n' => EscapeDebug::backslash(ascii::Char::SmallN),
493 '\\' => EscapeDebug::backslash(ascii::Char::ReverseSolidus),
494 '\"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark),
495 '\'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe),
496 _ if args.escape_grapheme_extended && self.is_grapheme_extended() => {
497 EscapeDebug::unicode(self)
498 }
499 _ if is_printable(self) => EscapeDebug::printable(self),
500 _ => EscapeDebug::unicode(self),
501 }
502 }
503
504 /// Returns an iterator that yields the literal escape code of a character
505 /// as `char`s.
506 ///
507 /// This will escape the characters similar to the [`Debug`](core::fmt::Debug) implementations
508 /// of `str` or `char`.
509 ///
510 /// # Examples
511 ///
512 /// As an iterator:
513 ///
514 /// ```
515 /// for c in '\n'.escape_debug() {
516 /// print!("{c}");
517 /// }
518 /// println!();
519 /// ```
520 ///
521 /// Using `println!` directly:
522 ///
523 /// ```
524 /// println!("{}", '\n'.escape_debug());
525 /// ```
526 ///
527 /// Both are equivalent to:
528 ///
529 /// ```
530 /// println!("\\n");
531 /// ```
532 ///
533 /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
534 ///
535 /// ```
536 /// assert_eq!('\n'.escape_debug().to_string(), "\\n");
537 /// ```
538 #[must_use = "this returns the escaped char as an iterator, \
539 without modifying the original"]
540 #[stable(feature = "char_escape_debug", since = "1.20.0")]
541 #[inline]
542 #[cfg(not(feature = "ferrocene_certified"))]
543 pub fn escape_debug(self) -> EscapeDebug {
544 self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL)
545 }
546
547 /// Returns an iterator that yields the literal escape code of a character
548 /// as `char`s.
549 ///
550 /// The default is chosen with a bias toward producing literals that are
551 /// legal in a variety of languages, including C++11 and similar C-family
552 /// languages. The exact rules are:
553 ///
554 /// * Tab is escaped as `\t`.
555 /// * Carriage return is escaped as `\r`.
556 /// * Line feed is escaped as `\n`.
557 /// * Single quote is escaped as `\'`.
558 /// * Double quote is escaped as `\"`.
559 /// * Backslash is escaped as `\\`.
560 /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e`
561 /// inclusive is not escaped.
562 /// * All other characters are given hexadecimal Unicode escapes; see
563 /// [`escape_unicode`].
564 ///
565 /// [`escape_unicode`]: #method.escape_unicode
566 ///
567 /// # Examples
568 ///
569 /// As an iterator:
570 ///
571 /// ```
572 /// for c in '"'.escape_default() {
573 /// print!("{c}");
574 /// }
575 /// println!();
576 /// ```
577 ///
578 /// Using `println!` directly:
579 ///
580 /// ```
581 /// println!("{}", '"'.escape_default());
582 /// ```
583 ///
584 /// Both are equivalent to:
585 ///
586 /// ```
587 /// println!("\\\"");
588 /// ```
589 ///
590 /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
591 ///
592 /// ```
593 /// assert_eq!('"'.escape_default().to_string(), "\\\"");
594 /// ```
595 #[must_use = "this returns the escaped char as an iterator, \
596 without modifying the original"]
597 #[stable(feature = "rust1", since = "1.0.0")]
598 #[inline]
599 #[cfg(not(feature = "ferrocene_certified"))]
600 pub fn escape_default(self) -> EscapeDefault {
601 match self {
602 '\t' => EscapeDefault::backslash(ascii::Char::SmallT),
603 '\r' => EscapeDefault::backslash(ascii::Char::SmallR),
604 '\n' => EscapeDefault::backslash(ascii::Char::SmallN),
605 '\\' | '\'' | '\"' => EscapeDefault::backslash(self.as_ascii().unwrap()),
606 '\x20'..='\x7e' => EscapeDefault::printable(self.as_ascii().unwrap()),
607 _ => EscapeDefault::unicode(self),
608 }
609 }
610
611 /// Returns the number of bytes this `char` would need if encoded in UTF-8.
612 ///
613 /// That number of bytes is always between 1 and 4, inclusive.
614 ///
615 /// # Examples
616 ///
617 /// Basic usage:
618 ///
619 /// ```
620 /// let len = 'A'.len_utf8();
621 /// assert_eq!(len, 1);
622 ///
623 /// let len = 'ß'.len_utf8();
624 /// assert_eq!(len, 2);
625 ///
626 /// let len = 'ℝ'.len_utf8();
627 /// assert_eq!(len, 3);
628 ///
629 /// let len = '💣'.len_utf8();
630 /// assert_eq!(len, 4);
631 /// ```
632 ///
633 /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it
634 /// would take if each code point was represented as a `char` vs in the `&str` itself:
635 ///
636 /// ```
637 /// // as chars
638 /// let eastern = '東';
639 /// let capital = '京';
640 ///
641 /// // both can be represented as three bytes
642 /// assert_eq!(3, eastern.len_utf8());
643 /// assert_eq!(3, capital.len_utf8());
644 ///
645 /// // as a &str, these two are encoded in UTF-8
646 /// let tokyo = "東京";
647 ///
648 /// let len = eastern.len_utf8() + capital.len_utf8();
649 ///
650 /// // we can see that they take six bytes total...
651 /// assert_eq!(6, tokyo.len());
652 ///
653 /// // ... just like the &str
654 /// assert_eq!(len, tokyo.len());
655 /// ```
656 #[stable(feature = "rust1", since = "1.0.0")]
657 #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
658 #[inline]
659 #[must_use]
660 pub const fn len_utf8(self) -> usize {
661 len_utf8(self as u32)
662 }
663
664 /// Returns the number of 16-bit code units this `char` would need if
665 /// encoded in UTF-16.
666 ///
667 /// That number of code units is always either 1 or 2, for unicode scalar values in
668 /// the [basic multilingual plane] or [supplementary planes] respectively.
669 ///
670 /// See the documentation for [`len_utf8()`] for more explanation of this
671 /// concept. This function is a mirror, but for UTF-16 instead of UTF-8.
672 ///
673 /// [basic multilingual plane]: http://www.unicode.org/glossary/#basic_multilingual_plane
674 /// [supplementary planes]: http://www.unicode.org/glossary/#supplementary_planes
675 /// [`len_utf8()`]: #method.len_utf8
676 ///
677 /// # Examples
678 ///
679 /// Basic usage:
680 ///
681 /// ```
682 /// let n = 'ß'.len_utf16();
683 /// assert_eq!(n, 1);
684 ///
685 /// let len = '💣'.len_utf16();
686 /// assert_eq!(len, 2);
687 /// ```
688 #[stable(feature = "rust1", since = "1.0.0")]
689 #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
690 #[inline]
691 #[must_use]
692 #[cfg(not(feature = "ferrocene_certified"))]
693 pub const fn len_utf16(self) -> usize {
694 len_utf16(self as u32)
695 }
696
697 /// Encodes this character as UTF-8 into the provided byte buffer,
698 /// and then returns the subslice of the buffer that contains the encoded character.
699 ///
700 /// # Panics
701 ///
702 /// Panics if the buffer is not large enough.
703 /// A buffer of length four is large enough to encode any `char`.
704 ///
705 /// # Examples
706 ///
707 /// In both of these examples, 'ß' takes two bytes to encode.
708 ///
709 /// ```
710 /// let mut b = [0; 2];
711 ///
712 /// let result = 'ß'.encode_utf8(&mut b);
713 ///
714 /// assert_eq!(result, "ß");
715 ///
716 /// assert_eq!(result.len(), 2);
717 /// ```
718 ///
719 /// A buffer that's too small:
720 ///
721 /// ```should_panic
722 /// let mut b = [0; 1];
723 ///
724 /// // this panics
725 /// 'ß'.encode_utf8(&mut b);
726 /// ```
727 #[stable(feature = "unicode_encode_char", since = "1.15.0")]
728 #[rustc_const_stable(feature = "const_char_encode_utf8", since = "1.83.0")]
729 #[inline]
730 #[cfg(not(feature = "ferrocene_certified"))]
731 pub const fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
732 // SAFETY: `char` is not a surrogate, so this is valid UTF-8.
733 unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
734 }
735
736 /// Encodes this character as native endian UTF-16 into the provided `u16` buffer,
737 /// and then returns the subslice of the buffer that contains the encoded character.
738 ///
739 /// # Panics
740 ///
741 /// Panics if the buffer is not large enough.
742 /// A buffer of length 2 is large enough to encode any `char`.
743 ///
744 /// # Examples
745 ///
746 /// In both of these examples, '𝕊' takes two `u16`s to encode.
747 ///
748 /// ```
749 /// let mut b = [0; 2];
750 ///
751 /// let result = '𝕊'.encode_utf16(&mut b);
752 ///
753 /// assert_eq!(result.len(), 2);
754 /// ```
755 ///
756 /// A buffer that's too small:
757 ///
758 /// ```should_panic
759 /// let mut b = [0; 1];
760 ///
761 /// // this panics
762 /// '𝕊'.encode_utf16(&mut b);
763 /// ```
764 #[stable(feature = "unicode_encode_char", since = "1.15.0")]
765 #[rustc_const_stable(feature = "const_char_encode_utf16", since = "1.84.0")]
766 #[inline]
767 #[cfg(not(feature = "ferrocene_certified"))]
768 pub const fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
769 encode_utf16_raw(self as u32, dst)
770 }
771
772 /// Returns `true` if this `char` has the `Alphabetic` property.
773 ///
774 /// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
775 /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
776 ///
777 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
778 /// [ucd]: https://www.unicode.org/reports/tr44/
779 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
780 ///
781 /// # Examples
782 ///
783 /// Basic usage:
784 ///
785 /// ```
786 /// assert!('a'.is_alphabetic());
787 /// assert!('京'.is_alphabetic());
788 ///
789 /// let c = '💝';
790 /// // love is many things, but it is not alphabetic
791 /// assert!(!c.is_alphabetic());
792 /// ```
793 #[must_use]
794 #[stable(feature = "rust1", since = "1.0.0")]
795 #[inline]
796 #[cfg(not(feature = "ferrocene_certified"))]
797 pub fn is_alphabetic(self) -> bool {
798 match self {
799 'a'..='z' | 'A'..='Z' => true,
800 c => c > '\x7f' && unicode::Alphabetic(c),
801 }
802 }
803
804 /// Returns `true` if this `char` has the `Lowercase` property.
805 ///
806 /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
807 /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
808 ///
809 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
810 /// [ucd]: https://www.unicode.org/reports/tr44/
811 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
812 ///
813 /// # Examples
814 ///
815 /// Basic usage:
816 ///
817 /// ```
818 /// assert!('a'.is_lowercase());
819 /// assert!('δ'.is_lowercase());
820 /// assert!(!'A'.is_lowercase());
821 /// assert!(!'Δ'.is_lowercase());
822 ///
823 /// // The various Chinese scripts and punctuation do not have case, and so:
824 /// assert!(!'中'.is_lowercase());
825 /// assert!(!' '.is_lowercase());
826 /// ```
827 ///
828 /// In a const context:
829 ///
830 /// ```
831 /// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ'.is_lowercase();
832 /// assert!(!CAPITAL_DELTA_IS_LOWERCASE);
833 /// ```
834 #[must_use]
835 #[stable(feature = "rust1", since = "1.0.0")]
836 #[rustc_const_stable(feature = "const_unicode_case_lookup", since = "1.84.0")]
837 #[inline]
838 #[cfg(not(feature = "ferrocene_certified"))]
839 pub const fn is_lowercase(self) -> bool {
840 match self {
841 'a'..='z' => true,
842 c => c > '\x7f' && unicode::Lowercase(c),
843 }
844 }
845
846 /// Returns `true` if this `char` has the `Uppercase` property.
847 ///
848 /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
849 /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
850 ///
851 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
852 /// [ucd]: https://www.unicode.org/reports/tr44/
853 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
854 ///
855 /// # Examples
856 ///
857 /// Basic usage:
858 ///
859 /// ```
860 /// assert!(!'a'.is_uppercase());
861 /// assert!(!'δ'.is_uppercase());
862 /// assert!('A'.is_uppercase());
863 /// assert!('Δ'.is_uppercase());
864 ///
865 /// // The various Chinese scripts and punctuation do not have case, and so:
866 /// assert!(!'中'.is_uppercase());
867 /// assert!(!' '.is_uppercase());
868 /// ```
869 ///
870 /// In a const context:
871 ///
872 /// ```
873 /// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ'.is_uppercase();
874 /// assert!(CAPITAL_DELTA_IS_UPPERCASE);
875 /// ```
876 #[must_use]
877 #[stable(feature = "rust1", since = "1.0.0")]
878 #[rustc_const_stable(feature = "const_unicode_case_lookup", since = "1.84.0")]
879 #[inline]
880 #[cfg(not(feature = "ferrocene_certified"))]
881 pub const fn is_uppercase(self) -> bool {
882 match self {
883 'A'..='Z' => true,
884 c => c > '\x7f' && unicode::Uppercase(c),
885 }
886 }
887
888 /// Returns `true` if this `char` has the `White_Space` property.
889 ///
890 /// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`].
891 ///
892 /// [ucd]: https://www.unicode.org/reports/tr44/
893 /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
894 ///
895 /// # Examples
896 ///
897 /// Basic usage:
898 ///
899 /// ```
900 /// assert!(' '.is_whitespace());
901 ///
902 /// // line break
903 /// assert!('\n'.is_whitespace());
904 ///
905 /// // a non-breaking space
906 /// assert!('\u{A0}'.is_whitespace());
907 ///
908 /// assert!(!'越'.is_whitespace());
909 /// ```
910 #[must_use]
911 #[stable(feature = "rust1", since = "1.0.0")]
912 #[rustc_const_stable(feature = "const_char_classify", since = "1.87.0")]
913 #[inline]
914 #[cfg(not(feature = "ferrocene_certified"))]
915 pub const fn is_whitespace(self) -> bool {
916 match self {
917 ' ' | '\x09'..='\x0d' => true,
918 c => c > '\x7f' && unicode::White_Space(c),
919 }
920 }
921
922 /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
923 ///
924 /// [`is_alphabetic()`]: #method.is_alphabetic
925 /// [`is_numeric()`]: #method.is_numeric
926 ///
927 /// # Examples
928 ///
929 /// Basic usage:
930 ///
931 /// ```
932 /// assert!('٣'.is_alphanumeric());
933 /// assert!('7'.is_alphanumeric());
934 /// assert!('৬'.is_alphanumeric());
935 /// assert!('¾'.is_alphanumeric());
936 /// assert!('①'.is_alphanumeric());
937 /// assert!('K'.is_alphanumeric());
938 /// assert!('و'.is_alphanumeric());
939 /// assert!('藏'.is_alphanumeric());
940 /// ```
941 #[must_use]
942 #[stable(feature = "rust1", since = "1.0.0")]
943 #[inline]
944 #[cfg(not(feature = "ferrocene_certified"))]
945 pub fn is_alphanumeric(self) -> bool {
946 if self.is_ascii() {
947 self.is_ascii_alphanumeric()
948 } else {
949 unicode::Alphabetic(self) || unicode::N(self)
950 }
951 }
952
953 /// Returns `true` if this `char` has the general category for control codes.
954 ///
955 /// Control codes (code points with the general category of `Cc`) are described in Chapter 4
956 /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
957 /// Database][ucd] [`UnicodeData.txt`].
958 ///
959 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
960 /// [ucd]: https://www.unicode.org/reports/tr44/
961 /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
962 ///
963 /// # Examples
964 ///
965 /// Basic usage:
966 ///
967 /// ```
968 /// // U+009C, STRING TERMINATOR
969 /// assert!(''.is_control());
970 /// assert!(!'q'.is_control());
971 /// ```
972 #[must_use]
973 #[stable(feature = "rust1", since = "1.0.0")]
974 #[inline]
975 #[cfg(not(feature = "ferrocene_certified"))]
976 pub fn is_control(self) -> bool {
977 // According to
978 // https://www.unicode.org/policies/stability_policy.html#Property_Value,
979 // the set of codepoints in `Cc` will never change.
980 // So we can just hard-code the patterns to match against instead of using a table.
981 matches!(self, '\0'..='\x1f' | '\x7f'..='\u{9f}')
982 }
983
984 /// Returns `true` if this `char` has the `Grapheme_Extend` property.
985 ///
986 /// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
987 /// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
988 /// [`DerivedCoreProperties.txt`].
989 ///
990 /// [uax29]: https://www.unicode.org/reports/tr29/
991 /// [ucd]: https://www.unicode.org/reports/tr44/
992 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
993 #[must_use]
994 #[inline]
995 #[cfg(not(feature = "ferrocene_certified"))]
996 pub(crate) fn is_grapheme_extended(self) -> bool {
997 !self.is_ascii() && unicode::Grapheme_Extend(self)
998 }
999
1000 /// Returns `true` if this `char` has the `Cased` property.
1001 ///
1002 /// `Cased` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
1003 /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
1004 ///
1005 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1006 /// [ucd]: https://www.unicode.org/reports/tr44/
1007 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1008 #[must_use]
1009 #[inline]
1010 #[doc(hidden)]
1011 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1012 #[cfg(not(feature = "ferrocene_certified"))]
1013 pub fn is_cased(self) -> bool {
1014 if self.is_ascii() { self.is_ascii_alphabetic() } else { unicode::Cased(self) }
1015 }
1016
1017 /// Returns `true` if this `char` has the `Case_Ignorable` property.
1018 ///
1019 /// `Case_Ignorable` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
1020 /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
1021 ///
1022 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1023 /// [ucd]: https://www.unicode.org/reports/tr44/
1024 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1025 #[must_use]
1026 #[inline]
1027 #[doc(hidden)]
1028 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1029 #[cfg(not(feature = "ferrocene_certified"))]
1030 pub fn is_case_ignorable(self) -> bool {
1031 if self.is_ascii() {
1032 matches!(self, '\'' | '.' | ':' | '^' | '`')
1033 } else {
1034 unicode::Case_Ignorable(self)
1035 }
1036 }
1037
1038 /// Returns `true` if this `char` has one of the general categories for numbers.
1039 ///
1040 /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
1041 /// characters, and `No` for other numeric characters) are specified in the [Unicode Character
1042 /// Database][ucd] [`UnicodeData.txt`].
1043 ///
1044 /// This method doesn't cover everything that could be considered a number, e.g. ideographic numbers like '三'.
1045 /// If you want everything including characters with overlapping purposes then you might want to use
1046 /// a unicode or language-processing library that exposes the appropriate character properties instead
1047 /// of looking at the unicode categories.
1048 ///
1049 /// If you want to parse ASCII decimal digits (0-9) or ASCII base-N, use
1050 /// `is_ascii_digit` or `is_digit` instead.
1051 ///
1052 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1053 /// [ucd]: https://www.unicode.org/reports/tr44/
1054 /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1055 ///
1056 /// # Examples
1057 ///
1058 /// Basic usage:
1059 ///
1060 /// ```
1061 /// assert!('٣'.is_numeric());
1062 /// assert!('7'.is_numeric());
1063 /// assert!('৬'.is_numeric());
1064 /// assert!('¾'.is_numeric());
1065 /// assert!('①'.is_numeric());
1066 /// assert!(!'K'.is_numeric());
1067 /// assert!(!'و'.is_numeric());
1068 /// assert!(!'藏'.is_numeric());
1069 /// assert!(!'三'.is_numeric());
1070 /// ```
1071 #[must_use]
1072 #[stable(feature = "rust1", since = "1.0.0")]
1073 #[inline]
1074 #[cfg(not(feature = "ferrocene_certified"))]
1075 pub fn is_numeric(self) -> bool {
1076 match self {
1077 '0'..='9' => true,
1078 c => c > '\x7f' && unicode::N(c),
1079 }
1080 }
1081
1082 /// Returns an iterator that yields the lowercase mapping of this `char` as one or more
1083 /// `char`s.
1084 ///
1085 /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
1086 ///
1087 /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
1088 /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1089 ///
1090 /// [ucd]: https://www.unicode.org/reports/tr44/
1091 /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1092 ///
1093 /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
1094 /// the `char`(s) given by [`SpecialCasing.txt`].
1095 ///
1096 /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1097 ///
1098 /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1099 /// is independent of context and language.
1100 ///
1101 /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1102 /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1103 ///
1104 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1105 ///
1106 /// # Examples
1107 ///
1108 /// As an iterator:
1109 ///
1110 /// ```
1111 /// for c in 'İ'.to_lowercase() {
1112 /// print!("{c}");
1113 /// }
1114 /// println!();
1115 /// ```
1116 ///
1117 /// Using `println!` directly:
1118 ///
1119 /// ```
1120 /// println!("{}", 'İ'.to_lowercase());
1121 /// ```
1122 ///
1123 /// Both are equivalent to:
1124 ///
1125 /// ```
1126 /// println!("i\u{307}");
1127 /// ```
1128 ///
1129 /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1130 ///
1131 /// ```
1132 /// assert_eq!('C'.to_lowercase().to_string(), "c");
1133 ///
1134 /// // Sometimes the result is more than one character:
1135 /// assert_eq!('İ'.to_lowercase().to_string(), "i\u{307}");
1136 ///
1137 /// // Characters that do not have both uppercase and lowercase
1138 /// // convert into themselves.
1139 /// assert_eq!('山'.to_lowercase().to_string(), "山");
1140 /// ```
1141 #[must_use = "this returns the lowercase character as a new iterator, \
1142 without modifying the original"]
1143 #[stable(feature = "rust1", since = "1.0.0")]
1144 #[inline]
1145 #[cfg(not(feature = "ferrocene_certified"))]
1146 pub fn to_lowercase(self) -> ToLowercase {
1147 ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
1148 }
1149
1150 /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
1151 /// `char`s.
1152 ///
1153 /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`.
1154 ///
1155 /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
1156 /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1157 ///
1158 /// [ucd]: https://www.unicode.org/reports/tr44/
1159 /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1160 ///
1161 /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
1162 /// the `char`(s) given by [`SpecialCasing.txt`].
1163 ///
1164 /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1165 ///
1166 /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1167 /// is independent of context and language.
1168 ///
1169 /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1170 /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1171 ///
1172 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1173 ///
1174 /// # Examples
1175 ///
1176 /// As an iterator:
1177 ///
1178 /// ```
1179 /// for c in 'ß'.to_uppercase() {
1180 /// print!("{c}");
1181 /// }
1182 /// println!();
1183 /// ```
1184 ///
1185 /// Using `println!` directly:
1186 ///
1187 /// ```
1188 /// println!("{}", 'ß'.to_uppercase());
1189 /// ```
1190 ///
1191 /// Both are equivalent to:
1192 ///
1193 /// ```
1194 /// println!("SS");
1195 /// ```
1196 ///
1197 /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1198 ///
1199 /// ```
1200 /// assert_eq!('c'.to_uppercase().to_string(), "C");
1201 ///
1202 /// // Sometimes the result is more than one character:
1203 /// assert_eq!('ß'.to_uppercase().to_string(), "SS");
1204 ///
1205 /// // Characters that do not have both uppercase and lowercase
1206 /// // convert into themselves.
1207 /// assert_eq!('山'.to_uppercase().to_string(), "山");
1208 /// ```
1209 ///
1210 /// # Note on locale
1211 ///
1212 /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
1213 ///
1214 /// * 'Dotless': I / ı, sometimes written ï
1215 /// * 'Dotted': İ / i
1216 ///
1217 /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
1218 ///
1219 /// ```
1220 /// let upper_i = 'i'.to_uppercase().to_string();
1221 /// ```
1222 ///
1223 /// The value of `upper_i` here relies on the language of the text: if we're
1224 /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should
1225 /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
1226 ///
1227 /// ```
1228 /// let upper_i = 'i'.to_uppercase().to_string();
1229 ///
1230 /// assert_eq!(upper_i, "I");
1231 /// ```
1232 ///
1233 /// holds across languages.
1234 #[must_use = "this returns the uppercase character as a new iterator, \
1235 without modifying the original"]
1236 #[stable(feature = "rust1", since = "1.0.0")]
1237 #[inline]
1238 #[cfg(not(feature = "ferrocene_certified"))]
1239 pub fn to_uppercase(self) -> ToUppercase {
1240 ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
1241 }
1242
1243 /// Checks if the value is within the ASCII range.
1244 ///
1245 /// # Examples
1246 ///
1247 /// ```
1248 /// let ascii = 'a';
1249 /// let non_ascii = '❤';
1250 ///
1251 /// assert!(ascii.is_ascii());
1252 /// assert!(!non_ascii.is_ascii());
1253 /// ```
1254 #[must_use]
1255 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1256 #[rustc_const_stable(feature = "const_char_is_ascii", since = "1.32.0")]
1257 #[rustc_diagnostic_item = "char_is_ascii"]
1258 #[inline]
1259 #[cfg(not(feature = "ferrocene_certified"))]
1260 pub const fn is_ascii(&self) -> bool {
1261 *self as u32 <= 0x7F
1262 }
1263
1264 /// Returns `Some` if the value is within the ASCII range,
1265 /// or `None` if it's not.
1266 ///
1267 /// This is preferred to [`Self::is_ascii`] when you're passing the value
1268 /// along to something else that can take [`ascii::Char`] rather than
1269 /// needing to check again for itself whether the value is in ASCII.
1270 #[must_use]
1271 #[unstable(feature = "ascii_char", issue = "110998")]
1272 #[inline]
1273 #[cfg(not(feature = "ferrocene_certified"))]
1274 pub const fn as_ascii(&self) -> Option<ascii::Char> {
1275 if self.is_ascii() {
1276 // SAFETY: Just checked that this is ASCII.
1277 Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) })
1278 } else {
1279 None
1280 }
1281 }
1282
1283 /// Converts this char into an [ASCII character](`ascii::Char`), without
1284 /// checking whether it is valid.
1285 ///
1286 /// # Safety
1287 ///
1288 /// This char must be within the ASCII range, or else this is UB.
1289 #[must_use]
1290 #[unstable(feature = "ascii_char", issue = "110998")]
1291 #[inline]
1292 #[cfg(not(feature = "ferrocene_certified"))]
1293 pub const unsafe fn as_ascii_unchecked(&self) -> ascii::Char {
1294 assert_unsafe_precondition!(
1295 check_library_ub,
1296 "as_ascii_unchecked requires that the char is valid ASCII",
1297 (it: &char = self) => it.is_ascii()
1298 );
1299
1300 // SAFETY: the caller promised that this char is ASCII.
1301 unsafe { ascii::Char::from_u8_unchecked(*self as u8) }
1302 }
1303
1304 /// Makes a copy of the value in its ASCII upper case equivalent.
1305 ///
1306 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1307 /// but non-ASCII letters are unchanged.
1308 ///
1309 /// To uppercase the value in-place, use [`make_ascii_uppercase()`].
1310 ///
1311 /// To uppercase ASCII characters in addition to non-ASCII characters, use
1312 /// [`to_uppercase()`].
1313 ///
1314 /// # Examples
1315 ///
1316 /// ```
1317 /// let ascii = 'a';
1318 /// let non_ascii = '❤';
1319 ///
1320 /// assert_eq!('A', ascii.to_ascii_uppercase());
1321 /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
1322 /// ```
1323 ///
1324 /// [`make_ascii_uppercase()`]: #method.make_ascii_uppercase
1325 /// [`to_uppercase()`]: #method.to_uppercase
1326 #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`"]
1327 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1328 #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1329 #[inline]
1330 #[cfg(not(feature = "ferrocene_certified"))]
1331 pub const fn to_ascii_uppercase(&self) -> char {
1332 if self.is_ascii_lowercase() {
1333 (*self as u8).ascii_change_case_unchecked() as char
1334 } else {
1335 *self
1336 }
1337 }
1338
1339 /// Makes a copy of the value in its ASCII lower case equivalent.
1340 ///
1341 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1342 /// but non-ASCII letters are unchanged.
1343 ///
1344 /// To lowercase the value in-place, use [`make_ascii_lowercase()`].
1345 ///
1346 /// To lowercase ASCII characters in addition to non-ASCII characters, use
1347 /// [`to_lowercase()`].
1348 ///
1349 /// # Examples
1350 ///
1351 /// ```
1352 /// let ascii = 'A';
1353 /// let non_ascii = '❤';
1354 ///
1355 /// assert_eq!('a', ascii.to_ascii_lowercase());
1356 /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
1357 /// ```
1358 ///
1359 /// [`make_ascii_lowercase()`]: #method.make_ascii_lowercase
1360 /// [`to_lowercase()`]: #method.to_lowercase
1361 #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`"]
1362 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1363 #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1364 #[inline]
1365 #[cfg(not(feature = "ferrocene_certified"))]
1366 pub const fn to_ascii_lowercase(&self) -> char {
1367 if self.is_ascii_uppercase() {
1368 (*self as u8).ascii_change_case_unchecked() as char
1369 } else {
1370 *self
1371 }
1372 }
1373
1374 /// Checks that two values are an ASCII case-insensitive match.
1375 ///
1376 /// Equivalent to <code>[to_ascii_lowercase]\(a) == [to_ascii_lowercase]\(b)</code>.
1377 ///
1378 /// # Examples
1379 ///
1380 /// ```
1381 /// let upper_a = 'A';
1382 /// let lower_a = 'a';
1383 /// let lower_z = 'z';
1384 ///
1385 /// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
1386 /// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
1387 /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
1388 /// ```
1389 ///
1390 /// [to_ascii_lowercase]: #method.to_ascii_lowercase
1391 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1392 #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1393 #[inline]
1394 #[cfg(not(feature = "ferrocene_certified"))]
1395 pub const fn eq_ignore_ascii_case(&self, other: &char) -> bool {
1396 self.to_ascii_lowercase() == other.to_ascii_lowercase()
1397 }
1398
1399 /// Converts this type to its ASCII upper case equivalent in-place.
1400 ///
1401 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1402 /// but non-ASCII letters are unchanged.
1403 ///
1404 /// To return a new uppercased value without modifying the existing one, use
1405 /// [`to_ascii_uppercase()`].
1406 ///
1407 /// # Examples
1408 ///
1409 /// ```
1410 /// let mut ascii = 'a';
1411 ///
1412 /// ascii.make_ascii_uppercase();
1413 ///
1414 /// assert_eq!('A', ascii);
1415 /// ```
1416 ///
1417 /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
1418 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1419 #[rustc_const_stable(feature = "const_make_ascii", since = "1.84.0")]
1420 #[inline]
1421 #[cfg(not(feature = "ferrocene_certified"))]
1422 pub const fn make_ascii_uppercase(&mut self) {
1423 *self = self.to_ascii_uppercase();
1424 }
1425
1426 /// Converts this type to its ASCII lower case equivalent in-place.
1427 ///
1428 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1429 /// but non-ASCII letters are unchanged.
1430 ///
1431 /// To return a new lowercased value without modifying the existing one, use
1432 /// [`to_ascii_lowercase()`].
1433 ///
1434 /// # Examples
1435 ///
1436 /// ```
1437 /// let mut ascii = 'A';
1438 ///
1439 /// ascii.make_ascii_lowercase();
1440 ///
1441 /// assert_eq!('a', ascii);
1442 /// ```
1443 ///
1444 /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
1445 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1446 #[rustc_const_stable(feature = "const_make_ascii", since = "1.84.0")]
1447 #[inline]
1448 #[cfg(not(feature = "ferrocene_certified"))]
1449 pub const fn make_ascii_lowercase(&mut self) {
1450 *self = self.to_ascii_lowercase();
1451 }
1452
1453 /// Checks if the value is an ASCII alphabetic character:
1454 ///
1455 /// - U+0041 'A' ..= U+005A 'Z', or
1456 /// - U+0061 'a' ..= U+007A 'z'.
1457 ///
1458 /// # Examples
1459 ///
1460 /// ```
1461 /// let uppercase_a = 'A';
1462 /// let uppercase_g = 'G';
1463 /// let a = 'a';
1464 /// let g = 'g';
1465 /// let zero = '0';
1466 /// let percent = '%';
1467 /// let space = ' ';
1468 /// let lf = '\n';
1469 /// let esc = '\x1b';
1470 ///
1471 /// assert!(uppercase_a.is_ascii_alphabetic());
1472 /// assert!(uppercase_g.is_ascii_alphabetic());
1473 /// assert!(a.is_ascii_alphabetic());
1474 /// assert!(g.is_ascii_alphabetic());
1475 /// assert!(!zero.is_ascii_alphabetic());
1476 /// assert!(!percent.is_ascii_alphabetic());
1477 /// assert!(!space.is_ascii_alphabetic());
1478 /// assert!(!lf.is_ascii_alphabetic());
1479 /// assert!(!esc.is_ascii_alphabetic());
1480 /// ```
1481 #[must_use]
1482 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1483 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1484 #[inline]
1485 #[cfg(not(feature = "ferrocene_certified"))]
1486 pub const fn is_ascii_alphabetic(&self) -> bool {
1487 matches!(*self, 'A'..='Z' | 'a'..='z')
1488 }
1489
1490 /// Checks if the value is an ASCII uppercase character:
1491 /// U+0041 'A' ..= U+005A 'Z'.
1492 ///
1493 /// # Examples
1494 ///
1495 /// ```
1496 /// let uppercase_a = 'A';
1497 /// let uppercase_g = 'G';
1498 /// let a = 'a';
1499 /// let g = 'g';
1500 /// let zero = '0';
1501 /// let percent = '%';
1502 /// let space = ' ';
1503 /// let lf = '\n';
1504 /// let esc = '\x1b';
1505 ///
1506 /// assert!(uppercase_a.is_ascii_uppercase());
1507 /// assert!(uppercase_g.is_ascii_uppercase());
1508 /// assert!(!a.is_ascii_uppercase());
1509 /// assert!(!g.is_ascii_uppercase());
1510 /// assert!(!zero.is_ascii_uppercase());
1511 /// assert!(!percent.is_ascii_uppercase());
1512 /// assert!(!space.is_ascii_uppercase());
1513 /// assert!(!lf.is_ascii_uppercase());
1514 /// assert!(!esc.is_ascii_uppercase());
1515 /// ```
1516 #[must_use]
1517 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1518 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1519 #[inline]
1520 #[cfg(not(feature = "ferrocene_certified"))]
1521 pub const fn is_ascii_uppercase(&self) -> bool {
1522 matches!(*self, 'A'..='Z')
1523 }
1524
1525 /// Checks if the value is an ASCII lowercase character:
1526 /// U+0061 'a' ..= U+007A 'z'.
1527 ///
1528 /// # Examples
1529 ///
1530 /// ```
1531 /// let uppercase_a = 'A';
1532 /// let uppercase_g = 'G';
1533 /// let a = 'a';
1534 /// let g = 'g';
1535 /// let zero = '0';
1536 /// let percent = '%';
1537 /// let space = ' ';
1538 /// let lf = '\n';
1539 /// let esc = '\x1b';
1540 ///
1541 /// assert!(!uppercase_a.is_ascii_lowercase());
1542 /// assert!(!uppercase_g.is_ascii_lowercase());
1543 /// assert!(a.is_ascii_lowercase());
1544 /// assert!(g.is_ascii_lowercase());
1545 /// assert!(!zero.is_ascii_lowercase());
1546 /// assert!(!percent.is_ascii_lowercase());
1547 /// assert!(!space.is_ascii_lowercase());
1548 /// assert!(!lf.is_ascii_lowercase());
1549 /// assert!(!esc.is_ascii_lowercase());
1550 /// ```
1551 #[must_use]
1552 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1553 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1554 #[inline]
1555 #[cfg(not(feature = "ferrocene_certified"))]
1556 pub const fn is_ascii_lowercase(&self) -> bool {
1557 matches!(*self, 'a'..='z')
1558 }
1559
1560 /// Checks if the value is an ASCII alphanumeric character:
1561 ///
1562 /// - U+0041 'A' ..= U+005A 'Z', or
1563 /// - U+0061 'a' ..= U+007A 'z', or
1564 /// - U+0030 '0' ..= U+0039 '9'.
1565 ///
1566 /// # Examples
1567 ///
1568 /// ```
1569 /// let uppercase_a = 'A';
1570 /// let uppercase_g = 'G';
1571 /// let a = 'a';
1572 /// let g = 'g';
1573 /// let zero = '0';
1574 /// let percent = '%';
1575 /// let space = ' ';
1576 /// let lf = '\n';
1577 /// let esc = '\x1b';
1578 ///
1579 /// assert!(uppercase_a.is_ascii_alphanumeric());
1580 /// assert!(uppercase_g.is_ascii_alphanumeric());
1581 /// assert!(a.is_ascii_alphanumeric());
1582 /// assert!(g.is_ascii_alphanumeric());
1583 /// assert!(zero.is_ascii_alphanumeric());
1584 /// assert!(!percent.is_ascii_alphanumeric());
1585 /// assert!(!space.is_ascii_alphanumeric());
1586 /// assert!(!lf.is_ascii_alphanumeric());
1587 /// assert!(!esc.is_ascii_alphanumeric());
1588 /// ```
1589 #[must_use]
1590 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1591 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1592 #[inline]
1593 #[cfg(not(feature = "ferrocene_certified"))]
1594 pub const fn is_ascii_alphanumeric(&self) -> bool {
1595 matches!(*self, '0'..='9') | matches!(*self, 'A'..='Z') | matches!(*self, 'a'..='z')
1596 }
1597
1598 /// Checks if the value is an ASCII decimal digit:
1599 /// U+0030 '0' ..= U+0039 '9'.
1600 ///
1601 /// # Examples
1602 ///
1603 /// ```
1604 /// let uppercase_a = 'A';
1605 /// let uppercase_g = 'G';
1606 /// let a = 'a';
1607 /// let g = 'g';
1608 /// let zero = '0';
1609 /// let percent = '%';
1610 /// let space = ' ';
1611 /// let lf = '\n';
1612 /// let esc = '\x1b';
1613 ///
1614 /// assert!(!uppercase_a.is_ascii_digit());
1615 /// assert!(!uppercase_g.is_ascii_digit());
1616 /// assert!(!a.is_ascii_digit());
1617 /// assert!(!g.is_ascii_digit());
1618 /// assert!(zero.is_ascii_digit());
1619 /// assert!(!percent.is_ascii_digit());
1620 /// assert!(!space.is_ascii_digit());
1621 /// assert!(!lf.is_ascii_digit());
1622 /// assert!(!esc.is_ascii_digit());
1623 /// ```
1624 #[must_use]
1625 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1626 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1627 #[inline]
1628 #[cfg(not(feature = "ferrocene_certified"))]
1629 pub const fn is_ascii_digit(&self) -> bool {
1630 matches!(*self, '0'..='9')
1631 }
1632
1633 /// Checks if the value is an ASCII octal digit:
1634 /// U+0030 '0' ..= U+0037 '7'.
1635 ///
1636 /// # Examples
1637 ///
1638 /// ```
1639 /// #![feature(is_ascii_octdigit)]
1640 ///
1641 /// let uppercase_a = 'A';
1642 /// let a = 'a';
1643 /// let zero = '0';
1644 /// let seven = '7';
1645 /// let nine = '9';
1646 /// let percent = '%';
1647 /// let lf = '\n';
1648 ///
1649 /// assert!(!uppercase_a.is_ascii_octdigit());
1650 /// assert!(!a.is_ascii_octdigit());
1651 /// assert!(zero.is_ascii_octdigit());
1652 /// assert!(seven.is_ascii_octdigit());
1653 /// assert!(!nine.is_ascii_octdigit());
1654 /// assert!(!percent.is_ascii_octdigit());
1655 /// assert!(!lf.is_ascii_octdigit());
1656 /// ```
1657 #[must_use]
1658 #[unstable(feature = "is_ascii_octdigit", issue = "101288")]
1659 #[inline]
1660 #[cfg(not(feature = "ferrocene_certified"))]
1661 pub const fn is_ascii_octdigit(&self) -> bool {
1662 matches!(*self, '0'..='7')
1663 }
1664
1665 /// Checks if the value is an ASCII hexadecimal digit:
1666 ///
1667 /// - U+0030 '0' ..= U+0039 '9', or
1668 /// - U+0041 'A' ..= U+0046 'F', or
1669 /// - U+0061 'a' ..= U+0066 'f'.
1670 ///
1671 /// # Examples
1672 ///
1673 /// ```
1674 /// let uppercase_a = 'A';
1675 /// let uppercase_g = 'G';
1676 /// let a = 'a';
1677 /// let g = 'g';
1678 /// let zero = '0';
1679 /// let percent = '%';
1680 /// let space = ' ';
1681 /// let lf = '\n';
1682 /// let esc = '\x1b';
1683 ///
1684 /// assert!(uppercase_a.is_ascii_hexdigit());
1685 /// assert!(!uppercase_g.is_ascii_hexdigit());
1686 /// assert!(a.is_ascii_hexdigit());
1687 /// assert!(!g.is_ascii_hexdigit());
1688 /// assert!(zero.is_ascii_hexdigit());
1689 /// assert!(!percent.is_ascii_hexdigit());
1690 /// assert!(!space.is_ascii_hexdigit());
1691 /// assert!(!lf.is_ascii_hexdigit());
1692 /// assert!(!esc.is_ascii_hexdigit());
1693 /// ```
1694 #[must_use]
1695 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1696 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1697 #[inline]
1698 #[cfg(not(feature = "ferrocene_certified"))]
1699 pub const fn is_ascii_hexdigit(&self) -> bool {
1700 matches!(*self, '0'..='9') | matches!(*self, 'A'..='F') | matches!(*self, 'a'..='f')
1701 }
1702
1703 /// Checks if the value is an ASCII punctuation character:
1704 ///
1705 /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /`, or
1706 /// - U+003A ..= U+0040 `: ; < = > ? @`, or
1707 /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``, or
1708 /// - U+007B ..= U+007E `{ | } ~`
1709 ///
1710 /// # Examples
1711 ///
1712 /// ```
1713 /// let uppercase_a = 'A';
1714 /// let uppercase_g = 'G';
1715 /// let a = 'a';
1716 /// let g = 'g';
1717 /// let zero = '0';
1718 /// let percent = '%';
1719 /// let space = ' ';
1720 /// let lf = '\n';
1721 /// let esc = '\x1b';
1722 ///
1723 /// assert!(!uppercase_a.is_ascii_punctuation());
1724 /// assert!(!uppercase_g.is_ascii_punctuation());
1725 /// assert!(!a.is_ascii_punctuation());
1726 /// assert!(!g.is_ascii_punctuation());
1727 /// assert!(!zero.is_ascii_punctuation());
1728 /// assert!(percent.is_ascii_punctuation());
1729 /// assert!(!space.is_ascii_punctuation());
1730 /// assert!(!lf.is_ascii_punctuation());
1731 /// assert!(!esc.is_ascii_punctuation());
1732 /// ```
1733 #[must_use]
1734 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1735 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1736 #[inline]
1737 #[cfg(not(feature = "ferrocene_certified"))]
1738 pub const fn is_ascii_punctuation(&self) -> bool {
1739 matches!(*self, '!'..='/')
1740 | matches!(*self, ':'..='@')
1741 | matches!(*self, '['..='`')
1742 | matches!(*self, '{'..='~')
1743 }
1744
1745 /// Checks if the value is an ASCII graphic character:
1746 /// U+0021 '!' ..= U+007E '~'.
1747 ///
1748 /// # Examples
1749 ///
1750 /// ```
1751 /// let uppercase_a = 'A';
1752 /// let uppercase_g = 'G';
1753 /// let a = 'a';
1754 /// let g = 'g';
1755 /// let zero = '0';
1756 /// let percent = '%';
1757 /// let space = ' ';
1758 /// let lf = '\n';
1759 /// let esc = '\x1b';
1760 ///
1761 /// assert!(uppercase_a.is_ascii_graphic());
1762 /// assert!(uppercase_g.is_ascii_graphic());
1763 /// assert!(a.is_ascii_graphic());
1764 /// assert!(g.is_ascii_graphic());
1765 /// assert!(zero.is_ascii_graphic());
1766 /// assert!(percent.is_ascii_graphic());
1767 /// assert!(!space.is_ascii_graphic());
1768 /// assert!(!lf.is_ascii_graphic());
1769 /// assert!(!esc.is_ascii_graphic());
1770 /// ```
1771 #[must_use]
1772 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1773 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1774 #[inline]
1775 #[cfg(not(feature = "ferrocene_certified"))]
1776 pub const fn is_ascii_graphic(&self) -> bool {
1777 matches!(*self, '!'..='~')
1778 }
1779
1780 /// Checks if the value is an ASCII whitespace character:
1781 /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
1782 /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
1783 ///
1784 /// Rust uses the WhatWG Infra Standard's [definition of ASCII
1785 /// whitespace][infra-aw]. There are several other definitions in
1786 /// wide use. For instance, [the POSIX locale][pct] includes
1787 /// U+000B VERTICAL TAB as well as all the above characters,
1788 /// but—from the very same specification—[the default rule for
1789 /// "field splitting" in the Bourne shell][bfs] considers *only*
1790 /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
1791 ///
1792 /// If you are writing a program that will process an existing
1793 /// file format, check what that format's definition of whitespace is
1794 /// before using this function.
1795 ///
1796 /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
1797 /// [pct]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
1798 /// [bfs]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
1799 ///
1800 /// # Examples
1801 ///
1802 /// ```
1803 /// let uppercase_a = 'A';
1804 /// let uppercase_g = 'G';
1805 /// let a = 'a';
1806 /// let g = 'g';
1807 /// let zero = '0';
1808 /// let percent = '%';
1809 /// let space = ' ';
1810 /// let lf = '\n';
1811 /// let esc = '\x1b';
1812 ///
1813 /// assert!(!uppercase_a.is_ascii_whitespace());
1814 /// assert!(!uppercase_g.is_ascii_whitespace());
1815 /// assert!(!a.is_ascii_whitespace());
1816 /// assert!(!g.is_ascii_whitespace());
1817 /// assert!(!zero.is_ascii_whitespace());
1818 /// assert!(!percent.is_ascii_whitespace());
1819 /// assert!(space.is_ascii_whitespace());
1820 /// assert!(lf.is_ascii_whitespace());
1821 /// assert!(!esc.is_ascii_whitespace());
1822 /// ```
1823 #[must_use]
1824 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1825 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1826 #[inline]
1827 #[cfg(not(feature = "ferrocene_certified"))]
1828 pub const fn is_ascii_whitespace(&self) -> bool {
1829 matches!(*self, '\t' | '\n' | '\x0C' | '\r' | ' ')
1830 }
1831
1832 /// Checks if the value is an ASCII control character:
1833 /// U+0000 NUL ..= U+001F UNIT SEPARATOR, or U+007F DELETE.
1834 /// Note that most ASCII whitespace characters are control
1835 /// characters, but SPACE is not.
1836 ///
1837 /// # Examples
1838 ///
1839 /// ```
1840 /// let uppercase_a = 'A';
1841 /// let uppercase_g = 'G';
1842 /// let a = 'a';
1843 /// let g = 'g';
1844 /// let zero = '0';
1845 /// let percent = '%';
1846 /// let space = ' ';
1847 /// let lf = '\n';
1848 /// let esc = '\x1b';
1849 ///
1850 /// assert!(!uppercase_a.is_ascii_control());
1851 /// assert!(!uppercase_g.is_ascii_control());
1852 /// assert!(!a.is_ascii_control());
1853 /// assert!(!g.is_ascii_control());
1854 /// assert!(!zero.is_ascii_control());
1855 /// assert!(!percent.is_ascii_control());
1856 /// assert!(!space.is_ascii_control());
1857 /// assert!(lf.is_ascii_control());
1858 /// assert!(esc.is_ascii_control());
1859 /// ```
1860 #[must_use]
1861 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1862 #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1863 #[inline]
1864 #[cfg(not(feature = "ferrocene_certified"))]
1865 pub const fn is_ascii_control(&self) -> bool {
1866 matches!(*self, '\0'..='\x1F' | '\x7F')
1867 }
1868}
1869
1870#[cfg(not(feature = "ferrocene_certified"))]
1871pub(crate) struct EscapeDebugExtArgs {
1872 /// Escape Extended Grapheme codepoints?
1873 pub(crate) escape_grapheme_extended: bool,
1874
1875 /// Escape single quotes?
1876 pub(crate) escape_single_quote: bool,
1877
1878 /// Escape double quotes?
1879 pub(crate) escape_double_quote: bool,
1880}
1881
1882#[cfg(not(feature = "ferrocene_certified"))]
1883impl EscapeDebugExtArgs {
1884 pub(crate) const ESCAPE_ALL: Self = Self {
1885 escape_grapheme_extended: true,
1886 escape_single_quote: true,
1887 escape_double_quote: true,
1888 };
1889}
1890
1891#[inline]
1892#[must_use]
1893const fn len_utf8(code: u32) -> usize {
1894 match code {
1895 ..MAX_ONE_B => 1,
1896 ..MAX_TWO_B => 2,
1897 ..MAX_THREE_B => 3,
1898 _ => 4,
1899 }
1900}
1901
1902#[inline]
1903#[must_use]
1904#[cfg(not(feature = "ferrocene_certified"))]
1905const fn len_utf16(code: u32) -> usize {
1906 if (code & 0xFFFF) == code { 1 } else { 2 }
1907}
1908
1909/// Encodes a raw `u32` value as UTF-8 into the provided byte buffer,
1910/// and then returns the subslice of the buffer that contains the encoded character.
1911///
1912/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
1913/// (Creating a `char` in the surrogate range is UB.)
1914/// The result is valid [generalized UTF-8] but not valid UTF-8.
1915///
1916/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
1917///
1918/// # Panics
1919///
1920/// Panics if the buffer is not large enough.
1921/// A buffer of length four is large enough to encode any `char`.
1922#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1923#[doc(hidden)]
1924#[inline]
1925#[cfg(not(feature = "ferrocene_certified"))]
1926pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
1927 let len = len_utf8(code);
1928 if dst.len() < len {
1929 const_panic!(
1930 "encode_utf8: buffer does not have enough bytes to encode code point",
1931 "encode_utf8: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
1932 code: u32 = code,
1933 len: usize = len,
1934 dst_len: usize = dst.len(),
1935 );
1936 }
1937
1938 // SAFETY: `dst` is checked to be at least the length needed to encode the codepoint.
1939 unsafe { encode_utf8_raw_unchecked(code, dst.as_mut_ptr()) };
1940
1941 // SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
1942 unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
1943}
1944
1945/// Encodes a raw `u32` value as UTF-8 into the byte buffer pointed to by `dst`.
1946///
1947/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
1948/// (Creating a `char` in the surrogate range is UB.)
1949/// The result is valid [generalized UTF-8] but not valid UTF-8.
1950///
1951/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
1952///
1953/// # Safety
1954///
1955/// The behavior is undefined if the buffer pointed to by `dst` is not
1956/// large enough to hold the encoded codepoint. A buffer of length four
1957/// is large enough to encode any `char`.
1958///
1959/// For a safe version of this function, see the [`encode_utf8_raw`] function.
1960#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1961#[doc(hidden)]
1962#[inline]
1963#[cfg(not(feature = "ferrocene_certified"))]
1964pub const unsafe fn encode_utf8_raw_unchecked(code: u32, dst: *mut u8) {
1965 let len = len_utf8(code);
1966 // SAFETY: The caller must guarantee that the buffer pointed to by `dst`
1967 // is at least `len` bytes long.
1968 unsafe {
1969 if len == 1 {
1970 *dst = code as u8;
1971 return;
1972 }
1973
1974 let last1 = (code >> 0 & 0x3F) as u8 | TAG_CONT;
1975 let last2 = (code >> 6 & 0x3F) as u8 | TAG_CONT;
1976 let last3 = (code >> 12 & 0x3F) as u8 | TAG_CONT;
1977 let last4 = (code >> 18 & 0x3F) as u8 | TAG_FOUR_B;
1978
1979 if len == 2 {
1980 *dst = last2 | TAG_TWO_B;
1981 *dst.add(1) = last1;
1982 return;
1983 }
1984
1985 if len == 3 {
1986 *dst = last3 | TAG_THREE_B;
1987 *dst.add(1) = last2;
1988 *dst.add(2) = last1;
1989 return;
1990 }
1991
1992 *dst = last4;
1993 *dst.add(1) = last3;
1994 *dst.add(2) = last2;
1995 *dst.add(3) = last1;
1996 }
1997}
1998
1999/// Encodes a raw `u32` value as native endian UTF-16 into the provided `u16` buffer,
2000/// and then returns the subslice of the buffer that contains the encoded character.
2001///
2002/// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range.
2003/// (Creating a `char` in the surrogate range is UB.)
2004///
2005/// # Panics
2006///
2007/// Panics if the buffer is not large enough.
2008/// A buffer of length 2 is large enough to encode any `char`.
2009#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
2010#[doc(hidden)]
2011#[inline]
2012#[cfg(not(feature = "ferrocene_certified"))]
2013pub const fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
2014 let len = len_utf16(code);
2015 match (len, &mut *dst) {
2016 (1, [a, ..]) => {
2017 *a = code as u16;
2018 }
2019 (2, [a, b, ..]) => {
2020 code -= 0x1_0000;
2021 *a = (code >> 10) as u16 | 0xD800;
2022 *b = (code & 0x3FF) as u16 | 0xDC00;
2023 }
2024 _ => {
2025 const_panic!(
2026 "encode_utf16: buffer does not have enough bytes to encode code point",
2027 "encode_utf16: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
2028 code: u32 = code,
2029 len: usize = len,
2030 dst_len: usize = dst.len(),
2031 )
2032 }
2033 };
2034 // SAFETY: `<&mut [u16]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
2035 unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
2036}