std/sys/os_str/
bytes.rs

1//! The underlying OsString/OsStr implementation on Unix and many other
2//! systems: just a `Vec<u8>`/`[u8]`.
3
4use core::clone::CloneToUninit;
5
6use crate::borrow::Cow;
7use crate::collections::TryReserveError;
8use crate::fmt::Write;
9use crate::rc::Rc;
10use crate::sync::Arc;
11use crate::sys_common::{AsInner, FromInner, IntoInner};
12use crate::{fmt, mem, str};
13
14#[cfg(test)]
15mod tests;
16
17#[derive(Hash)]
18#[repr(transparent)]
19pub struct Buf {
20    pub inner: Vec<u8>,
21}
22
23#[repr(transparent)]
24pub struct Slice {
25    pub inner: [u8],
26}
27
28impl IntoInner<Vec<u8>> for Buf {
29    fn into_inner(self) -> Vec<u8> {
30        self.inner
31    }
32}
33
34impl FromInner<Vec<u8>> for Buf {
35    fn from_inner(inner: Vec<u8>) -> Self {
36        Buf { inner }
37    }
38}
39
40impl AsInner<[u8]> for Buf {
41    #[inline]
42    fn as_inner(&self) -> &[u8] {
43        &self.inner
44    }
45}
46
47impl fmt::Debug for Buf {
48    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
49        fmt::Debug::fmt(self.as_slice(), f)
50    }
51}
52
53impl fmt::Display for Buf {
54    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
55        fmt::Display::fmt(self.as_slice(), f)
56    }
57}
58
59impl fmt::Debug for Slice {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        fmt::Debug::fmt(&self.inner.utf8_chunks().debug(), f)
62    }
63}
64
65impl fmt::Display for Slice {
66    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
67        // If we're the empty string then our iterator won't actually yield
68        // anything, so perform the formatting manually
69        if self.inner.is_empty() {
70            return "".fmt(f);
71        }
72
73        for chunk in self.inner.utf8_chunks() {
74            let valid = chunk.valid();
75            // If we successfully decoded the whole chunk as a valid string then
76            // we can return a direct formatting of the string which will also
77            // respect various formatting flags if possible.
78            if chunk.invalid().is_empty() {
79                return valid.fmt(f);
80            }
81
82            f.write_str(valid)?;
83            f.write_char(char::REPLACEMENT_CHARACTER)?;
84        }
85        Ok(())
86    }
87}
88
89impl Clone for Buf {
90    #[inline]
91    fn clone(&self) -> Self {
92        Buf { inner: self.inner.clone() }
93    }
94
95    #[inline]
96    fn clone_from(&mut self, source: &Self) {
97        self.inner.clone_from(&source.inner)
98    }
99}
100
101impl Buf {
102    #[inline]
103    pub fn into_encoded_bytes(self) -> Vec<u8> {
104        self.inner
105    }
106
107    #[inline]
108    pub unsafe fn from_encoded_bytes_unchecked(s: Vec<u8>) -> Self {
109        Self { inner: s }
110    }
111
112    #[inline]
113    pub fn into_string(self) -> Result<String, Buf> {
114        String::from_utf8(self.inner).map_err(|p| Buf { inner: p.into_bytes() })
115    }
116
117    #[inline]
118    pub const fn from_string(s: String) -> Buf {
119        Buf { inner: s.into_bytes() }
120    }
121
122    #[inline]
123    pub fn with_capacity(capacity: usize) -> Buf {
124        Buf { inner: Vec::with_capacity(capacity) }
125    }
126
127    #[inline]
128    pub fn clear(&mut self) {
129        self.inner.clear()
130    }
131
132    #[inline]
133    pub fn capacity(&self) -> usize {
134        self.inner.capacity()
135    }
136
137    #[inline]
138    pub fn push_slice(&mut self, s: &Slice) {
139        self.inner.extend_from_slice(&s.inner)
140    }
141
142    #[inline]
143    pub fn push_str(&mut self, s: &str) {
144        self.inner.extend_from_slice(s.as_bytes());
145    }
146
147    #[inline]
148    pub fn reserve(&mut self, additional: usize) {
149        self.inner.reserve(additional)
150    }
151
152    #[inline]
153    pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
154        self.inner.try_reserve(additional)
155    }
156
157    #[inline]
158    pub fn reserve_exact(&mut self, additional: usize) {
159        self.inner.reserve_exact(additional)
160    }
161
162    #[inline]
163    pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
164        self.inner.try_reserve_exact(additional)
165    }
166
167    #[inline]
168    pub fn shrink_to_fit(&mut self) {
169        self.inner.shrink_to_fit()
170    }
171
172    #[inline]
173    pub fn shrink_to(&mut self, min_capacity: usize) {
174        self.inner.shrink_to(min_capacity)
175    }
176
177    #[inline]
178    pub fn as_slice(&self) -> &Slice {
179        // SAFETY: Slice is just a wrapper for [u8],
180        // and self.inner.as_slice() returns &[u8].
181        // Therefore, transmuting &[u8] to &Slice is safe.
182        unsafe { mem::transmute(self.inner.as_slice()) }
183    }
184
185    #[inline]
186    pub fn as_mut_slice(&mut self) -> &mut Slice {
187        // SAFETY: Slice is just a wrapper for [u8],
188        // and self.inner.as_mut_slice() returns &mut [u8].
189        // Therefore, transmuting &mut [u8] to &mut Slice is safe.
190        unsafe { mem::transmute(self.inner.as_mut_slice()) }
191    }
192
193    #[inline]
194    pub fn leak<'a>(self) -> &'a mut Slice {
195        unsafe { mem::transmute(self.inner.leak()) }
196    }
197
198    #[inline]
199    pub fn into_box(self) -> Box<Slice> {
200        unsafe { mem::transmute(self.inner.into_boxed_slice()) }
201    }
202
203    #[inline]
204    pub fn from_box(boxed: Box<Slice>) -> Buf {
205        let inner: Box<[u8]> = unsafe { mem::transmute(boxed) };
206        Buf { inner: inner.into_vec() }
207    }
208
209    #[inline]
210    pub fn into_arc(&self) -> Arc<Slice> {
211        self.as_slice().into_arc()
212    }
213
214    #[inline]
215    pub fn into_rc(&self) -> Rc<Slice> {
216        self.as_slice().into_rc()
217    }
218
219    /// Provides plumbing to `Vec::truncate` without giving full mutable access
220    /// to the `Vec`.
221    ///
222    /// # Safety
223    ///
224    /// The length must be at an `OsStr` boundary, according to
225    /// `Slice::check_public_boundary`.
226    #[inline]
227    pub unsafe fn truncate_unchecked(&mut self, len: usize) {
228        self.inner.truncate(len);
229    }
230
231    /// Provides plumbing to `Vec::extend_from_slice` without giving full
232    /// mutable access to the `Vec`.
233    ///
234    /// # Safety
235    ///
236    /// The slice must be valid for the platform encoding (as described in
237    /// `OsStr::from_encoded_bytes_unchecked`). This encoding has no safety
238    /// requirements.
239    #[inline]
240    pub unsafe fn extend_from_slice_unchecked(&mut self, other: &[u8]) {
241        self.inner.extend_from_slice(other);
242    }
243}
244
245impl Slice {
246    #[inline]
247    pub fn as_encoded_bytes(&self) -> &[u8] {
248        &self.inner
249    }
250
251    #[inline]
252    pub unsafe fn from_encoded_bytes_unchecked(s: &[u8]) -> &Slice {
253        unsafe { mem::transmute(s) }
254    }
255
256    #[track_caller]
257    #[inline]
258    pub fn check_public_boundary(&self, index: usize) {
259        if index == 0 || index == self.inner.len() {
260            return;
261        }
262        if index < self.inner.len()
263            && (self.inner[index - 1].is_ascii() || self.inner[index].is_ascii())
264        {
265            return;
266        }
267
268        slow_path(&self.inner, index);
269
270        /// We're betting that typical splits will involve an ASCII character.
271        ///
272        /// Putting the expensive checks in a separate function generates notably
273        /// better assembly.
274        #[track_caller]
275        #[inline(never)]
276        fn slow_path(bytes: &[u8], index: usize) {
277            let (before, after) = bytes.split_at(index);
278
279            // UTF-8 takes at most 4 bytes per codepoint, so we don't
280            // need to check more than that.
281            let after = after.get(..4).unwrap_or(after);
282            match str::from_utf8(after) {
283                Ok(_) => return,
284                Err(err) if err.valid_up_to() != 0 => return,
285                Err(_) => (),
286            }
287
288            for len in 2..=4.min(index) {
289                let before = &before[index - len..];
290                if str::from_utf8(before).is_ok() {
291                    return;
292                }
293            }
294
295            panic!("byte index {index} is not an OsStr boundary");
296        }
297    }
298
299    #[inline]
300    pub fn from_str(s: &str) -> &Slice {
301        unsafe { Slice::from_encoded_bytes_unchecked(s.as_bytes()) }
302    }
303
304    #[inline]
305    pub fn to_str(&self) -> Result<&str, crate::str::Utf8Error> {
306        str::from_utf8(&self.inner)
307    }
308
309    #[inline]
310    pub fn to_string_lossy(&self) -> Cow<'_, str> {
311        String::from_utf8_lossy(&self.inner)
312    }
313
314    #[inline]
315    pub fn to_owned(&self) -> Buf {
316        Buf { inner: self.inner.to_vec() }
317    }
318
319    #[inline]
320    pub fn clone_into(&self, buf: &mut Buf) {
321        self.inner.clone_into(&mut buf.inner)
322    }
323
324    #[inline]
325    pub fn into_box(&self) -> Box<Slice> {
326        let boxed: Box<[u8]> = self.inner.into();
327        unsafe { mem::transmute(boxed) }
328    }
329
330    #[inline]
331    pub fn empty_box() -> Box<Slice> {
332        let boxed: Box<[u8]> = Default::default();
333        unsafe { mem::transmute(boxed) }
334    }
335
336    #[inline]
337    pub fn into_arc(&self) -> Arc<Slice> {
338        let arc: Arc<[u8]> = Arc::from(&self.inner);
339        unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Slice) }
340    }
341
342    #[inline]
343    pub fn into_rc(&self) -> Rc<Slice> {
344        let rc: Rc<[u8]> = Rc::from(&self.inner);
345        unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Slice) }
346    }
347
348    #[inline]
349    pub fn make_ascii_lowercase(&mut self) {
350        self.inner.make_ascii_lowercase()
351    }
352
353    #[inline]
354    pub fn make_ascii_uppercase(&mut self) {
355        self.inner.make_ascii_uppercase()
356    }
357
358    #[inline]
359    pub fn to_ascii_lowercase(&self) -> Buf {
360        Buf { inner: self.inner.to_ascii_lowercase() }
361    }
362
363    #[inline]
364    pub fn to_ascii_uppercase(&self) -> Buf {
365        Buf { inner: self.inner.to_ascii_uppercase() }
366    }
367
368    #[inline]
369    pub fn is_ascii(&self) -> bool {
370        self.inner.is_ascii()
371    }
372
373    #[inline]
374    pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
375        self.inner.eq_ignore_ascii_case(&other.inner)
376    }
377}
378
379#[unstable(feature = "clone_to_uninit", issue = "126799")]
380unsafe impl CloneToUninit for Slice {
381    #[inline]
382    #[cfg_attr(debug_assertions, track_caller)]
383    unsafe fn clone_to_uninit(&self, dst: *mut u8) {
384        // SAFETY: we're just a transparent wrapper around [u8]
385        unsafe { self.inner.clone_to_uninit(dst) }
386    }
387}