Skip to main content

core/bstr/
mod.rs

1//! The `ByteStr` type and trait implementations.
2
3mod traits;
4
5#[unstable(feature = "bstr_internals", issue = "none")]
6pub use traits::{impl_partial_eq, impl_partial_eq_n, impl_partial_eq_ord};
7
8use crate::borrow::{Borrow, BorrowMut};
9use crate::fmt;
10use crate::fmt::Alignment;
11use crate::ops::{Deref, DerefMut, DerefPure};
12
13/// A wrapper for `&[u8]` representing a human-readable string that's conventionally, but not
14/// always, UTF-8.
15///
16/// Unlike `&str`, this type permits non-UTF-8 contents, making it suitable for user input,
17/// non-native filenames (as `Path` only supports native filenames), and other applications that
18/// need to round-trip whatever data the user provides.
19///
20/// For an owned, growable byte string buffer, use
21/// [`ByteString`](../../std/bstr/struct.ByteString.html).
22///
23/// `ByteStr` implements `Deref` to `[u8]`, so all methods available on `[u8]` are available on
24/// `ByteStr`.
25///
26/// # Representation
27///
28/// A `&ByteStr` has the same representation as a `&str`. That is, a `&ByteStr` is a wide pointer
29/// which includes a pointer to some bytes and a length.
30///
31/// # Trait implementations
32///
33/// The `ByteStr` type has a number of trait implementations, and in particular, defines equality
34/// and comparisons between `&ByteStr`, `&str`, and `&[u8]`, for convenience.
35///
36/// The `Debug` implementation for `ByteStr` shows its bytes as a normal string, with invalid UTF-8
37/// presented as hex escape sequences.
38///
39/// The `Display` implementation behaves as if the `ByteStr` were first lossily converted to a
40/// `str`, with invalid UTF-8 presented as the Unicode replacement character (�).
41#[unstable(feature = "bstr", issue = "134915")]
42#[repr(transparent)]
43#[doc(alias = "BStr")]
44#[ferrocene::prevalidated]
45pub struct ByteStr(pub [u8]);
46
47impl ByteStr {
48    /// Creates a `ByteStr` slice from anything that can be converted to a byte slice.
49    ///
50    /// This is a zero-cost conversion.
51    ///
52    /// # Example
53    ///
54    /// You can create a `ByteStr` from a byte array, a byte slice or a string slice:
55    ///
56    /// ```
57    /// # #![feature(bstr)]
58    /// # use std::bstr::ByteStr;
59    /// let a = ByteStr::new(b"abc");
60    /// let b = ByteStr::new(&b"abc"[..]);
61    /// let c = ByteStr::new("abc");
62    ///
63    /// assert_eq!(a, b);
64    /// assert_eq!(a, c);
65    /// ```
66    #[inline]
67    #[unstable(feature = "bstr", issue = "134915")]
68    #[rustc_const_unstable(feature = "const_convert", issue = "143773")]
69    pub const fn new<B: ?Sized + [const] AsRef<[u8]>>(bytes: &B) -> &Self {
70        ByteStr::from_bytes(bytes.as_ref())
71    }
72
73    /// Returns the same string as `&ByteStr`.
74    ///
75    /// This method is redundant when used directly on `&ByteStr`, but
76    /// it helps dereferencing other "container" types,
77    /// for example `Box<ByteStr>` or `Arc<ByteStr>`.
78    #[inline]
79    // #[unstable(feature = "str_as_str", issue = "130366")]
80    #[unstable(feature = "bstr", issue = "134915")]
81    pub const fn as_byte_str(&self) -> &ByteStr {
82        self
83    }
84
85    /// Returns the same string as `&mut ByteStr`.
86    ///
87    /// This method is redundant when used directly on `&mut ByteStr`, but
88    /// it helps dereferencing other "container" types,
89    /// for example `Box<ByteStr>` or `MutexGuard<ByteStr>`.
90    #[inline]
91    // #[unstable(feature = "str_as_str", issue = "130366")]
92    #[unstable(feature = "bstr", issue = "134915")]
93    pub const fn as_mut_byte_str(&mut self) -> &mut ByteStr {
94        self
95    }
96
97    #[doc(hidden)]
98    #[unstable(feature = "bstr_internals", issue = "none")]
99    #[inline]
100    #[rustc_const_unstable(feature = "bstr_internals", issue = "none")]
101    #[ferrocene::prevalidated]
102    pub const fn from_bytes(slice: &[u8]) -> &Self {
103        // SAFETY: `ByteStr` is a transparent wrapper around `[u8]`, so we can turn a reference to
104        // the wrapped type into a reference to the wrapper type.
105        unsafe { &*(slice as *const [u8] as *const Self) }
106    }
107
108    #[doc(hidden)]
109    #[unstable(feature = "bstr_internals", issue = "none")]
110    #[inline]
111    #[rustc_const_unstable(feature = "bstr_internals", issue = "none")]
112    pub const fn from_bytes_mut(slice: &mut [u8]) -> &mut Self {
113        // SAFETY: `ByteStr` is a transparent wrapper around `[u8]`, so we can turn a reference to
114        // the wrapped type into a reference to the wrapper type.
115        unsafe { &mut *(slice as *mut [u8] as *mut Self) }
116    }
117
118    #[doc(hidden)]
119    #[unstable(feature = "bstr_internals", issue = "none")]
120    #[inline]
121    #[rustc_const_unstable(feature = "bstr_internals", issue = "none")]
122    pub const fn as_bytes(&self) -> &[u8] {
123        &self.0
124    }
125
126    #[doc(hidden)]
127    #[unstable(feature = "bstr_internals", issue = "none")]
128    #[inline]
129    #[rustc_const_unstable(feature = "bstr_internals", issue = "none")]
130    pub const fn as_bytes_mut(&mut self) -> &mut [u8] {
131        &mut self.0
132    }
133}
134
135#[unstable(feature = "bstr", issue = "134915")]
136#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
137impl const Deref for ByteStr {
138    type Target = [u8];
139
140    #[inline]
141    #[ferrocene::prevalidated]
142    fn deref(&self) -> &[u8] {
143        &self.0
144    }
145}
146
147#[unstable(feature = "bstr", issue = "134915")]
148#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
149impl const DerefMut for ByteStr {
150    #[inline]
151    #[ferrocene::prevalidated]
152    fn deref_mut(&mut self) -> &mut [u8] {
153        &mut self.0
154    }
155}
156
157#[unstable(feature = "deref_pure_trait", issue = "87121")]
158unsafe impl DerefPure for ByteStr {}
159
160#[unstable(feature = "bstr", issue = "134915")]
161impl fmt::Debug for ByteStr {
162    #[ferrocene::prevalidated]
163    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
164        write!(f, "\"")?;
165        for chunk in self.utf8_chunks() {
166            for c in chunk.valid().chars() {
167                match c {
168                    '\0' => write!(f, "\\0")?,
169                    '\x01'..='\x7f' => write!(f, "{}", (c as u8).escape_ascii())?,
170                    _ => write!(f, "{}", c.escape_debug())?,
171                }
172            }
173            write!(f, "{}", chunk.invalid().escape_ascii())?;
174        }
175        write!(f, "\"")?;
176        Ok(())
177    }
178}
179
180#[unstable(feature = "bstr", issue = "134915")]
181impl fmt::Display for ByteStr {
182    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
183        fn emit(byte_str: &ByteStr, f: &mut fmt::Formatter<'_>) -> fmt::Result {
184            for chunk in byte_str.utf8_chunks() {
185                f.write_str(chunk.valid())?;
186                if !chunk.invalid().is_empty() {
187                    f.write_str("\u{FFFD}")?;
188                }
189            }
190
191            Ok(())
192        }
193
194        let requested_width = f.width().unwrap_or(0);
195        if requested_width == 0 && f.precision().is_none() {
196            // Avoid counting the characters if no truncation or padding was
197            // requested.
198            return emit(self, f);
199        }
200
201        let (truncated, actual_width) = match f.precision() {
202            // The entire string is truncated away. Weird, but ok.
203            Some(0) => (ByteStr::new(&[]), 0),
204            // Advance through string until we run out of space.
205            Some(precision) => {
206                let mut remaining_width = precision;
207                let mut chunks = self.utf8_chunks();
208                let mut current_width = 0;
209                let mut offset = 0;
210                loop {
211                    let Some(chunk) = chunks.next() else {
212                        // We reached the end of the string without running out
213                        // of space, so print the entire string.
214                        break (self, current_width);
215                    };
216
217                    let mut chars = chunk.valid().char_indices();
218                    let Err(remaining) = chars.advance_by(remaining_width) else {
219                        // We've counted off `precision` characters, so truncate
220                        // the string at the current offset.
221                        break (&self[..offset + chars.offset()], precision);
222                    };
223
224                    offset += chunk.valid().len();
225                    current_width += remaining_width - remaining.get();
226                    remaining_width = remaining.get();
227
228                    // `remaining_width` cannot be zero, there is still space
229                    // remaining. So next, count the � character emitted for
230                    // the invalid chunk (if it exists).
231                    if !chunk.invalid().is_empty() {
232                        offset += chunk.invalid().len();
233                        current_width += 1;
234                        remaining_width -= 1;
235
236                        if remaining_width == 0 {
237                            break (&self[..offset], precision);
238                        }
239                    }
240                }
241            }
242            // The string shouldn't be truncated at all, so just count the number
243            // of characters to calculate the padding.
244            None => {
245                let actual_width = self
246                    .utf8_chunks()
247                    .map(|chunk| {
248                        chunk.valid().chars().count()
249                            + if chunk.invalid().is_empty() { 0 } else { 1 }
250                    })
251                    .sum();
252                (self, actual_width)
253            }
254        };
255
256        // The width is originally stored as a 16-bit number, so this cannot fail.
257        let padding = u16::try_from(requested_width.saturating_sub(actual_width)).unwrap();
258
259        let post_padding = f.padding(padding, Alignment::Left)?;
260        emit(truncated, f)?;
261        post_padding.write(f)
262    }
263}
264
265#[unstable(feature = "bstr", issue = "134915")]
266#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
267impl const AsRef<[u8]> for ByteStr {
268    #[inline]
269    fn as_ref(&self) -> &[u8] {
270        &self.0
271    }
272}
273
274#[unstable(feature = "bstr", issue = "134915")]
275#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
276impl const AsRef<ByteStr> for ByteStr {
277    #[inline]
278    fn as_ref(&self) -> &ByteStr {
279        self
280    }
281}
282
283// `impl AsRef<ByteStr> for [u8]` omitted to avoid widespread inference failures
284
285#[unstable(feature = "bstr", issue = "134915")]
286#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
287impl const AsRef<ByteStr> for str {
288    #[inline]
289    fn as_ref(&self) -> &ByteStr {
290        ByteStr::new(self)
291    }
292}
293
294#[unstable(feature = "bstr", issue = "134915")]
295#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
296impl const AsMut<[u8]> for ByteStr {
297    #[inline]
298    fn as_mut(&mut self) -> &mut [u8] {
299        &mut self.0
300    }
301}
302
303// `impl AsMut<ByteStr> for [u8]` omitted to avoid widespread inference failures
304
305// `impl Borrow<ByteStr> for [u8]` omitted to avoid widespread inference failures
306
307// `impl Borrow<ByteStr> for str` omitted to avoid widespread inference failures
308
309#[unstable(feature = "bstr", issue = "134915")]
310#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
311impl const Borrow<[u8]> for ByteStr {
312    #[inline]
313    fn borrow(&self) -> &[u8] {
314        &self.0
315    }
316}
317
318// `impl BorrowMut<ByteStr> for [u8]` omitted to avoid widespread inference failures
319
320#[unstable(feature = "bstr", issue = "134915")]
321#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
322impl const BorrowMut<[u8]> for ByteStr {
323    #[inline]
324    fn borrow_mut(&mut self) -> &mut [u8] {
325        &mut self.0
326    }
327}
328
329#[unstable(feature = "bstr", issue = "134915")]
330impl<'a> Default for &'a ByteStr {
331    fn default() -> Self {
332        ByteStr::from_bytes(b"")
333    }
334}
335
336#[unstable(feature = "bstr", issue = "134915")]
337impl<'a> Default for &'a mut ByteStr {
338    fn default() -> Self {
339        ByteStr::from_bytes_mut(&mut [])
340    }
341}
342
343// Omitted due to inference failures
344//
345// #[unstable(feature = "bstr", issue = "134915")]
346// impl<'a, const N: usize> From<&'a [u8; N]> for &'a ByteStr {
347//     #[inline]
348//     fn from(s: &'a [u8; N]) -> Self {
349//         ByteStr::from_bytes(s)
350//     }
351// }
352//
353// #[unstable(feature = "bstr", issue = "134915")]
354// impl<'a> From<&'a [u8]> for &'a ByteStr {
355//     #[inline]
356//     fn from(s: &'a [u8]) -> Self {
357//         ByteStr::from_bytes(s)
358//     }
359// }
360
361// Omitted due to slice-from-array-issue-113238:
362//
363// #[unstable(feature = "bstr", issue = "134915")]
364// impl<'a> From<&'a ByteStr> for &'a [u8] {
365//     #[inline]
366//     fn from(s: &'a ByteStr) -> Self {
367//         &s.0
368//     }
369// }
370//
371// #[unstable(feature = "bstr", issue = "134915")]
372// impl<'a> From<&'a mut ByteStr> for &'a mut [u8] {
373//     #[inline]
374//     fn from(s: &'a mut ByteStr) -> Self {
375//         &mut s.0
376//     }
377// }
378
379// Omitted due to inference failures
380//
381// #[unstable(feature = "bstr", issue = "134915")]
382// impl<'a> From<&'a str> for &'a ByteStr {
383//     #[inline]
384//     fn from(s: &'a str) -> Self {
385//         ByteStr::from_bytes(s.as_bytes())
386//     }
387// }
388
389#[unstable(feature = "bstr", issue = "134915")]
390#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
391impl<'a> const TryFrom<&'a ByteStr> for &'a str {
392    type Error = crate::str::Utf8Error;
393
394    #[inline]
395    fn try_from(s: &'a ByteStr) -> Result<Self, Self::Error> {
396        crate::str::from_utf8(&s.0)
397    }
398}
399
400#[unstable(feature = "bstr", issue = "134915")]
401#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
402impl<'a> const TryFrom<&'a mut ByteStr> for &'a mut str {
403    type Error = crate::str::Utf8Error;
404
405    #[inline]
406    fn try_from(s: &'a mut ByteStr) -> Result<Self, Self::Error> {
407        crate::str::from_utf8_mut(&mut s.0)
408    }
409}