std/sys/pal/unix/
thread.rs

1use crate::ffi::CStr;
2use crate::mem::{self, ManuallyDrop};
3use crate::num::NonZero;
4#[cfg(all(target_os = "linux", target_env = "gnu"))]
5use crate::sys::weak::dlsym;
6#[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto",))]
7use crate::sys::weak::weak;
8use crate::sys::{os, stack_overflow};
9use crate::time::{Duration, Instant};
10use crate::{cmp, io, ptr};
11#[cfg(not(any(
12    target_os = "l4re",
13    target_os = "vxworks",
14    target_os = "espidf",
15    target_os = "nuttx"
16)))]
17pub const DEFAULT_MIN_STACK_SIZE: usize = 2 * 1024 * 1024;
18#[cfg(target_os = "l4re")]
19pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 1024;
20#[cfg(target_os = "vxworks")]
21pub const DEFAULT_MIN_STACK_SIZE: usize = 256 * 1024;
22#[cfg(any(target_os = "espidf", target_os = "nuttx"))]
23pub const DEFAULT_MIN_STACK_SIZE: usize = 0; // 0 indicates that the stack size configured in the ESP-IDF/NuttX menuconfig system should be used
24
25pub struct Thread {
26    id: libc::pthread_t,
27}
28
29// Some platforms may have pthread_t as a pointer in which case we still want
30// a thread to be Send/Sync
31unsafe impl Send for Thread {}
32unsafe impl Sync for Thread {}
33
34impl Thread {
35    // unsafe: see thread::Builder::spawn_unchecked for safety requirements
36    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
37    pub unsafe fn new(stack: usize, p: Box<dyn FnOnce()>) -> io::Result<Thread> {
38        let p = Box::into_raw(Box::new(p));
39        let mut native: libc::pthread_t = mem::zeroed();
40        let mut attr: mem::MaybeUninit<libc::pthread_attr_t> = mem::MaybeUninit::uninit();
41        assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0);
42
43        #[cfg(any(target_os = "espidf", target_os = "nuttx"))]
44        if stack > 0 {
45            // Only set the stack if a non-zero value is passed
46            // 0 is used as an indication that the default stack size configured in the ESP-IDF/NuttX menuconfig system should be used
47            assert_eq!(
48                libc::pthread_attr_setstacksize(
49                    attr.as_mut_ptr(),
50                    cmp::max(stack, min_stack_size(attr.as_ptr()))
51                ),
52                0
53            );
54        }
55
56        #[cfg(not(any(target_os = "espidf", target_os = "nuttx")))]
57        {
58            let stack_size = cmp::max(stack, min_stack_size(attr.as_ptr()));
59
60            match libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size) {
61                0 => {}
62                n => {
63                    assert_eq!(n, libc::EINVAL);
64                    // EINVAL means |stack_size| is either too small or not a
65                    // multiple of the system page size. Because it's definitely
66                    // >= PTHREAD_STACK_MIN, it must be an alignment issue.
67                    // Round up to the nearest page and try again.
68                    let page_size = os::page_size();
69                    let stack_size =
70                        (stack_size + page_size - 1) & (-(page_size as isize - 1) as usize - 1);
71                    assert_eq!(libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size), 0);
72                }
73            };
74        }
75
76        let ret = libc::pthread_create(&mut native, attr.as_ptr(), thread_start, p as *mut _);
77        // Note: if the thread creation fails and this assert fails, then p will
78        // be leaked. However, an alternative design could cause double-free
79        // which is clearly worse.
80        assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0);
81
82        return if ret != 0 {
83            // The thread failed to start and as a result p was not consumed. Therefore, it is
84            // safe to reconstruct the box so that it gets deallocated.
85            drop(Box::from_raw(p));
86            Err(io::Error::from_raw_os_error(ret))
87        } else {
88            Ok(Thread { id: native })
89        };
90
91        extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void {
92            unsafe {
93                // Next, set up our stack overflow handler which may get triggered if we run
94                // out of stack.
95                let _handler = stack_overflow::Handler::new();
96                // Finally, let's run some code.
97                Box::from_raw(main as *mut Box<dyn FnOnce()>)();
98            }
99            ptr::null_mut()
100        }
101    }
102
103    pub fn yield_now() {
104        let ret = unsafe { libc::sched_yield() };
105        debug_assert_eq!(ret, 0);
106    }
107
108    #[cfg(target_os = "android")]
109    pub fn set_name(name: &CStr) {
110        const PR_SET_NAME: libc::c_int = 15;
111        unsafe {
112            let res = libc::prctl(
113                PR_SET_NAME,
114                name.as_ptr(),
115                0 as libc::c_ulong,
116                0 as libc::c_ulong,
117                0 as libc::c_ulong,
118            );
119            // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
120            debug_assert_eq!(res, 0);
121        }
122    }
123
124    #[cfg(any(
125        target_os = "linux",
126        target_os = "freebsd",
127        target_os = "dragonfly",
128        target_os = "nuttx",
129        target_os = "cygwin"
130    ))]
131    pub fn set_name(name: &CStr) {
132        unsafe {
133            cfg_if::cfg_if! {
134                if #[cfg(any(target_os = "linux", target_os = "cygwin"))] {
135                    // Linux and Cygwin limits the allowed length of the name.
136                    const TASK_COMM_LEN: usize = 16;
137                    let name = truncate_cstr::<{ TASK_COMM_LEN }>(name);
138                } else {
139                    // FreeBSD, DragonFly BSD and NuttX do not enforce length limits.
140                }
141            };
142            // Available since glibc 2.12, musl 1.1.16, and uClibc 1.0.20 for Linux,
143            // FreeBSD 12.2 and 13.0, and DragonFly BSD 6.0.
144            let res = libc::pthread_setname_np(libc::pthread_self(), name.as_ptr());
145            // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
146            debug_assert_eq!(res, 0);
147        }
148    }
149
150    #[cfg(target_os = "openbsd")]
151    pub fn set_name(name: &CStr) {
152        unsafe {
153            libc::pthread_set_name_np(libc::pthread_self(), name.as_ptr());
154        }
155    }
156
157    #[cfg(target_vendor = "apple")]
158    pub fn set_name(name: &CStr) {
159        unsafe {
160            let name = truncate_cstr::<{ libc::MAXTHREADNAMESIZE }>(name);
161            let res = libc::pthread_setname_np(name.as_ptr());
162            // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
163            debug_assert_eq!(res, 0);
164        }
165    }
166
167    #[cfg(target_os = "netbsd")]
168    pub fn set_name(name: &CStr) {
169        unsafe {
170            let res = libc::pthread_setname_np(
171                libc::pthread_self(),
172                c"%s".as_ptr(),
173                name.as_ptr() as *mut libc::c_void,
174            );
175            debug_assert_eq!(res, 0);
176        }
177    }
178
179    #[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto"))]
180    pub fn set_name(name: &CStr) {
181        weak!(
182            fn pthread_setname_np(
183                thread: libc::pthread_t,
184                name: *const libc::c_char,
185            ) -> libc::c_int;
186        );
187
188        if let Some(f) = pthread_setname_np.get() {
189            #[cfg(target_os = "nto")]
190            const THREAD_NAME_MAX: usize = libc::_NTO_THREAD_NAME_MAX as usize;
191            #[cfg(any(target_os = "solaris", target_os = "illumos"))]
192            const THREAD_NAME_MAX: usize = 32;
193
194            let name = truncate_cstr::<{ THREAD_NAME_MAX }>(name);
195            let res = unsafe { f(libc::pthread_self(), name.as_ptr()) };
196            debug_assert_eq!(res, 0);
197        }
198    }
199
200    #[cfg(target_os = "fuchsia")]
201    pub fn set_name(name: &CStr) {
202        use super::fuchsia::*;
203        unsafe {
204            zx_object_set_property(
205                zx_thread_self(),
206                ZX_PROP_NAME,
207                name.as_ptr() as *const libc::c_void,
208                name.to_bytes().len(),
209            );
210        }
211    }
212
213    #[cfg(target_os = "haiku")]
214    pub fn set_name(name: &CStr) {
215        unsafe {
216            let thread_self = libc::find_thread(ptr::null_mut());
217            let res = libc::rename_thread(thread_self, name.as_ptr());
218            // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
219            debug_assert_eq!(res, libc::B_OK);
220        }
221    }
222
223    #[cfg(target_os = "vxworks")]
224    pub fn set_name(name: &CStr) {
225        let mut name = truncate_cstr::<{ (libc::VX_TASK_RENAME_LENGTH - 1) as usize }>(name);
226        let res = unsafe { libc::taskNameSet(libc::taskIdSelf(), name.as_mut_ptr()) };
227        debug_assert_eq!(res, libc::OK);
228    }
229
230    #[cfg(any(
231        target_env = "newlib",
232        target_os = "l4re",
233        target_os = "emscripten",
234        target_os = "redox",
235        target_os = "hurd",
236        target_os = "aix",
237    ))]
238    pub fn set_name(_name: &CStr) {
239        // Newlib and Emscripten have no way to set a thread name.
240    }
241
242    #[cfg(not(target_os = "espidf"))]
243    pub fn sleep(dur: Duration) {
244        let mut secs = dur.as_secs();
245        let mut nsecs = dur.subsec_nanos() as _;
246
247        // If we're awoken with a signal then the return value will be -1 and
248        // nanosleep will fill in `ts` with the remaining time.
249        unsafe {
250            while secs > 0 || nsecs > 0 {
251                let mut ts = libc::timespec {
252                    tv_sec: cmp::min(libc::time_t::MAX as u64, secs) as libc::time_t,
253                    tv_nsec: nsecs,
254                };
255                secs -= ts.tv_sec as u64;
256                let ts_ptr = &raw mut ts;
257                if libc::nanosleep(ts_ptr, ts_ptr) == -1 {
258                    assert_eq!(os::errno(), libc::EINTR);
259                    secs += ts.tv_sec as u64;
260                    nsecs = ts.tv_nsec;
261                } else {
262                    nsecs = 0;
263                }
264            }
265        }
266    }
267
268    #[cfg(target_os = "espidf")]
269    pub fn sleep(dur: Duration) {
270        // ESP-IDF does not have `nanosleep`, so we use `usleep` instead.
271        // As per the documentation of `usleep`, it is expected to support
272        // sleep times as big as at least up to 1 second.
273        //
274        // ESP-IDF does support almost up to `u32::MAX`, but due to a potential integer overflow in its
275        // `usleep` implementation
276        // (https://github.com/espressif/esp-idf/blob/d7ca8b94c852052e3bc33292287ef4dd62c9eeb1/components/newlib/time.c#L210),
277        // we limit the sleep time to the maximum one that would not cause the underlying `usleep` implementation to overflow
278        // (`portTICK_PERIOD_MS` can be anything between 1 to 1000, and is 10 by default).
279        const MAX_MICROS: u32 = u32::MAX - 1_000_000 - 1;
280
281        // Add any nanoseconds smaller than a microsecond as an extra microsecond
282        // so as to comply with the `std::thread::sleep` contract which mandates
283        // implementations to sleep for _at least_ the provided `dur`.
284        // We can't overflow `micros` as it is a `u128`, while `Duration` is a pair of
285        // (`u64` secs, `u32` nanos), where the nanos are strictly smaller than 1 second
286        // (i.e. < 1_000_000_000)
287        let mut micros = dur.as_micros() + if dur.subsec_nanos() % 1_000 > 0 { 1 } else { 0 };
288
289        while micros > 0 {
290            let st = if micros > MAX_MICROS as u128 { MAX_MICROS } else { micros as u32 };
291            unsafe {
292                libc::usleep(st);
293            }
294
295            micros -= st as u128;
296        }
297    }
298
299    // Any unix that has clock_nanosleep
300    // If this list changes update the MIRI chock_nanosleep shim
301    #[cfg(any(
302        target_os = "freebsd",
303        target_os = "netbsd",
304        target_os = "linux",
305        target_os = "android",
306        target_os = "solaris",
307        target_os = "illumos",
308        target_os = "dragonfly",
309        target_os = "hurd",
310        target_os = "fuchsia",
311        target_os = "vxworks",
312    ))]
313    pub fn sleep_until(deadline: Instant) {
314        let Some(ts) = deadline.into_inner().into_timespec().to_timespec() else {
315            // The deadline is further in the future then can be passed to
316            // clock_nanosleep. We have to use Self::sleep instead. This might
317            // happen on 32 bit platforms, especially closer to 2038.
318            let now = Instant::now();
319            if let Some(delay) = deadline.checked_duration_since(now) {
320                Self::sleep(delay);
321            }
322            return;
323        };
324
325        unsafe {
326            // When we get interrupted (res = EINTR) call clock_nanosleep again
327            loop {
328                let res = libc::clock_nanosleep(
329                    super::time::Instant::CLOCK_ID,
330                    libc::TIMER_ABSTIME,
331                    &ts,
332                    core::ptr::null_mut(), // not required with TIMER_ABSTIME
333                );
334
335                if res == 0 {
336                    break;
337                } else {
338                    assert_eq!(
339                        res,
340                        libc::EINTR,
341                        "timespec is in range,
342                         clockid is valid and kernel should support it"
343                    );
344                }
345            }
346        }
347    }
348
349    // Any unix that does not have clock_nanosleep
350    #[cfg(not(any(
351        target_os = "freebsd",
352        target_os = "netbsd",
353        target_os = "linux",
354        target_os = "android",
355        target_os = "solaris",
356        target_os = "illumos",
357        target_os = "dragonfly",
358        target_os = "hurd",
359        target_os = "fuchsia",
360        target_os = "vxworks",
361    )))]
362    pub fn sleep_until(deadline: Instant) {
363        let now = Instant::now();
364        if let Some(delay) = deadline.checked_duration_since(now) {
365            Self::sleep(delay);
366        }
367    }
368
369    pub fn join(self) {
370        let id = self.into_id();
371        let ret = unsafe { libc::pthread_join(id, ptr::null_mut()) };
372        assert!(ret == 0, "failed to join thread: {}", io::Error::from_raw_os_error(ret));
373    }
374
375    pub fn id(&self) -> libc::pthread_t {
376        self.id
377    }
378
379    pub fn into_id(self) -> libc::pthread_t {
380        ManuallyDrop::new(self).id
381    }
382}
383
384impl Drop for Thread {
385    fn drop(&mut self) {
386        let ret = unsafe { libc::pthread_detach(self.id) };
387        debug_assert_eq!(ret, 0);
388    }
389}
390
391#[cfg(any(
392    target_os = "linux",
393    target_os = "nto",
394    target_os = "solaris",
395    target_os = "illumos",
396    target_os = "vxworks",
397    target_os = "cygwin",
398    target_vendor = "apple",
399))]
400fn truncate_cstr<const MAX_WITH_NUL: usize>(cstr: &CStr) -> [libc::c_char; MAX_WITH_NUL] {
401    let mut result = [0; MAX_WITH_NUL];
402    for (src, dst) in cstr.to_bytes().iter().zip(&mut result[..MAX_WITH_NUL - 1]) {
403        *dst = *src as libc::c_char;
404    }
405    result
406}
407
408pub fn available_parallelism() -> io::Result<NonZero<usize>> {
409    cfg_if::cfg_if! {
410        if #[cfg(any(
411            target_os = "android",
412            target_os = "emscripten",
413            target_os = "fuchsia",
414            target_os = "hurd",
415            target_os = "linux",
416            target_os = "aix",
417            target_vendor = "apple",
418            target_os = "cygwin",
419        ))] {
420            #[allow(unused_assignments)]
421            #[allow(unused_mut)]
422            let mut quota = usize::MAX;
423
424            #[cfg(any(target_os = "android", target_os = "linux"))]
425            {
426                quota = cgroups::quota().max(1);
427                let mut set: libc::cpu_set_t = unsafe { mem::zeroed() };
428                unsafe {
429                    if libc::sched_getaffinity(0, size_of::<libc::cpu_set_t>(), &mut set) == 0 {
430                        let count = libc::CPU_COUNT(&set) as usize;
431                        let count = count.min(quota);
432
433                        // According to sched_getaffinity's API it should always be non-zero, but
434                        // some old MIPS kernels were buggy and zero-initialized the mask if
435                        // none was explicitly set.
436                        // In that case we use the sysconf fallback.
437                        if let Some(count) = NonZero::new(count) {
438                            return Ok(count)
439                        }
440                    }
441                }
442            }
443            match unsafe { libc::sysconf(libc::_SC_NPROCESSORS_ONLN) } {
444                -1 => Err(io::Error::last_os_error()),
445                0 => Err(io::Error::UNKNOWN_THREAD_COUNT),
446                cpus => {
447                    let count = cpus as usize;
448                    // Cover the unusual situation where we were able to get the quota but not the affinity mask
449                    let count = count.min(quota);
450                    Ok(unsafe { NonZero::new_unchecked(count) })
451                }
452            }
453        } else if #[cfg(any(
454                   target_os = "freebsd",
455                   target_os = "dragonfly",
456                   target_os = "openbsd",
457                   target_os = "netbsd",
458               ))] {
459            use crate::ptr;
460
461            #[cfg(target_os = "freebsd")]
462            {
463                let mut set: libc::cpuset_t = unsafe { mem::zeroed() };
464                unsafe {
465                    if libc::cpuset_getaffinity(
466                        libc::CPU_LEVEL_WHICH,
467                        libc::CPU_WHICH_PID,
468                        -1,
469                        size_of::<libc::cpuset_t>(),
470                        &mut set,
471                    ) == 0 {
472                        let count = libc::CPU_COUNT(&set) as usize;
473                        if count > 0 {
474                            return Ok(NonZero::new_unchecked(count));
475                        }
476                    }
477                }
478            }
479
480            #[cfg(target_os = "netbsd")]
481            {
482                unsafe {
483                    let set = libc::_cpuset_create();
484                    if !set.is_null() {
485                        let mut count: usize = 0;
486                        if libc::pthread_getaffinity_np(libc::pthread_self(), libc::_cpuset_size(set), set) == 0 {
487                            for i in 0..libc::cpuid_t::MAX {
488                                match libc::_cpuset_isset(i, set) {
489                                    -1 => break,
490                                    0 => continue,
491                                    _ => count = count + 1,
492                                }
493                            }
494                        }
495                        libc::_cpuset_destroy(set);
496                        if let Some(count) = NonZero::new(count) {
497                            return Ok(count);
498                        }
499                    }
500                }
501            }
502
503            let mut cpus: libc::c_uint = 0;
504            let mut cpus_size = size_of_val(&cpus);
505
506            unsafe {
507                cpus = libc::sysconf(libc::_SC_NPROCESSORS_ONLN) as libc::c_uint;
508            }
509
510            // Fallback approach in case of errors or no hardware threads.
511            if cpus < 1 {
512                let mut mib = [libc::CTL_HW, libc::HW_NCPU, 0, 0];
513                let res = unsafe {
514                    libc::sysctl(
515                        mib.as_mut_ptr(),
516                        2,
517                        (&raw mut cpus) as *mut _,
518                        (&raw mut cpus_size) as *mut _,
519                        ptr::null_mut(),
520                        0,
521                    )
522                };
523
524                // Handle errors if any.
525                if res == -1 {
526                    return Err(io::Error::last_os_error());
527                } else if cpus == 0 {
528                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
529                }
530            }
531
532            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
533        } else if #[cfg(target_os = "nto")] {
534            unsafe {
535                use libc::_syspage_ptr;
536                if _syspage_ptr.is_null() {
537                    Err(io::const_error!(io::ErrorKind::NotFound, "no syspage available"))
538                } else {
539                    let cpus = (*_syspage_ptr).num_cpu;
540                    NonZero::new(cpus as usize)
541                        .ok_or(io::Error::UNKNOWN_THREAD_COUNT)
542                }
543            }
544        } else if #[cfg(any(target_os = "solaris", target_os = "illumos"))] {
545            let mut cpus = 0u32;
546            if unsafe { libc::pset_info(libc::PS_MYID, core::ptr::null_mut(), &mut cpus, core::ptr::null_mut()) } != 0 {
547                return Err(io::Error::UNKNOWN_THREAD_COUNT);
548            }
549            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
550        } else if #[cfg(target_os = "haiku")] {
551            // system_info cpu_count field gets the static data set at boot time with `smp_set_num_cpus`
552            // `get_system_info` calls then `smp_get_num_cpus`
553            unsafe {
554                let mut sinfo: libc::system_info = crate::mem::zeroed();
555                let res = libc::get_system_info(&mut sinfo);
556
557                if res != libc::B_OK {
558                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
559                }
560
561                Ok(NonZero::new_unchecked(sinfo.cpu_count as usize))
562            }
563        } else if #[cfg(target_os = "vxworks")] {
564            // Note: there is also `vxCpuConfiguredGet`, closer to _SC_NPROCESSORS_CONF
565            // expectations than the actual cores availability.
566            unsafe extern "C" {
567                fn vxCpuEnabledGet() -> libc::cpuset_t;
568            }
569
570            // SAFETY: `vxCpuEnabledGet` always fetches a mask with at least one bit set
571            unsafe{
572                let set = vxCpuEnabledGet();
573                Ok(NonZero::new_unchecked(set.count_ones() as usize))
574            }
575        } else {
576            // FIXME: implement on Redox, l4re
577            Err(io::const_error!(io::ErrorKind::Unsupported, "getting the number of hardware threads is not supported on the target platform"))
578        }
579    }
580}
581
582#[cfg(any(target_os = "android", target_os = "linux"))]
583mod cgroups {
584    //! Currently not covered
585    //! * cgroup v2 in non-standard mountpoints
586    //! * paths containing control characters or spaces, since those would be escaped in procfs
587    //!   output and we don't unescape
588
589    use crate::borrow::Cow;
590    use crate::ffi::OsString;
591    use crate::fs::{File, exists};
592    use crate::io::{BufRead, Read};
593    use crate::os::unix::ffi::OsStringExt;
594    use crate::path::{Path, PathBuf};
595    use crate::str::from_utf8;
596
597    #[derive(PartialEq)]
598    enum Cgroup {
599        V1,
600        V2,
601    }
602
603    /// Returns cgroup CPU quota in core-equivalents, rounded down or usize::MAX if the quota cannot
604    /// be determined or is not set.
605    pub(super) fn quota() -> usize {
606        let mut quota = usize::MAX;
607        if cfg!(miri) {
608            // Attempting to open a file fails under default flags due to isolation.
609            // And Miri does not have parallelism anyway.
610            return quota;
611        }
612
613        let _: Option<()> = try {
614            let mut buf = Vec::with_capacity(128);
615            // find our place in the cgroup hierarchy
616            File::open("/proc/self/cgroup").ok()?.read_to_end(&mut buf).ok()?;
617            let (cgroup_path, version) =
618                buf.split(|&c| c == b'\n').fold(None, |previous, line| {
619                    let mut fields = line.splitn(3, |&c| c == b':');
620                    // 2nd field is a list of controllers for v1 or empty for v2
621                    let version = match fields.nth(1) {
622                        Some(b"") => Cgroup::V2,
623                        Some(controllers)
624                            if from_utf8(controllers)
625                                .is_ok_and(|c| c.split(',').any(|c| c == "cpu")) =>
626                        {
627                            Cgroup::V1
628                        }
629                        _ => return previous,
630                    };
631
632                    // already-found v1 trumps v2 since it explicitly specifies its controllers
633                    if previous.is_some() && version == Cgroup::V2 {
634                        return previous;
635                    }
636
637                    let path = fields.last()?;
638                    // skip leading slash
639                    Some((path[1..].to_owned(), version))
640                })?;
641            let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path));
642
643            quota = match version {
644                Cgroup::V1 => quota_v1(cgroup_path),
645                Cgroup::V2 => quota_v2(cgroup_path),
646            };
647        };
648
649        quota
650    }
651
652    fn quota_v2(group_path: PathBuf) -> usize {
653        let mut quota = usize::MAX;
654
655        let mut path = PathBuf::with_capacity(128);
656        let mut read_buf = String::with_capacity(20);
657
658        // standard mount location defined in file-hierarchy(7) manpage
659        let cgroup_mount = "/sys/fs/cgroup";
660
661        path.push(cgroup_mount);
662        path.push(&group_path);
663
664        path.push("cgroup.controllers");
665
666        // skip if we're not looking at cgroup2
667        if matches!(exists(&path), Err(_) | Ok(false)) {
668            return usize::MAX;
669        };
670
671        path.pop();
672
673        let _: Option<()> = try {
674            while path.starts_with(cgroup_mount) {
675                path.push("cpu.max");
676
677                read_buf.clear();
678
679                if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() {
680                    let raw_quota = read_buf.lines().next()?;
681                    let mut raw_quota = raw_quota.split(' ');
682                    let limit = raw_quota.next()?;
683                    let period = raw_quota.next()?;
684                    match (limit.parse::<usize>(), period.parse::<usize>()) {
685                        (Ok(limit), Ok(period)) if period > 0 => {
686                            quota = quota.min(limit / period);
687                        }
688                        _ => {}
689                    }
690                }
691
692                path.pop(); // pop filename
693                path.pop(); // pop dir
694            }
695        };
696
697        quota
698    }
699
700    fn quota_v1(group_path: PathBuf) -> usize {
701        let mut quota = usize::MAX;
702        let mut path = PathBuf::with_capacity(128);
703        let mut read_buf = String::with_capacity(20);
704
705        // Hardcode commonly used locations mentioned in the cgroups(7) manpage
706        // if that doesn't work scan mountinfo and adjust `group_path` for bind-mounts
707        let mounts: &[fn(&Path) -> Option<(_, &Path)>] = &[
708            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu"), p)),
709            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu,cpuacct"), p)),
710            // this can be expensive on systems with tons of mountpoints
711            // but we only get to this point when /proc/self/cgroups explicitly indicated
712            // this process belongs to a cpu-controller cgroup v1 and the defaults didn't work
713            find_mountpoint,
714        ];
715
716        for mount in mounts {
717            let Some((mount, group_path)) = mount(&group_path) else { continue };
718
719            path.clear();
720            path.push(mount.as_ref());
721            path.push(&group_path);
722
723            // skip if we guessed the mount incorrectly
724            if matches!(exists(&path), Err(_) | Ok(false)) {
725                continue;
726            }
727
728            while path.starts_with(mount.as_ref()) {
729                let mut parse_file = |name| {
730                    path.push(name);
731                    read_buf.clear();
732
733                    let f = File::open(&path);
734                    path.pop(); // restore buffer before any early returns
735                    f.ok()?.read_to_string(&mut read_buf).ok()?;
736                    let parsed = read_buf.trim().parse::<usize>().ok()?;
737
738                    Some(parsed)
739                };
740
741                let limit = parse_file("cpu.cfs_quota_us");
742                let period = parse_file("cpu.cfs_period_us");
743
744                match (limit, period) {
745                    (Some(limit), Some(period)) if period > 0 => quota = quota.min(limit / period),
746                    _ => {}
747                }
748
749                path.pop();
750            }
751
752            // we passed the try_exists above so we should have traversed the correct hierarchy
753            // when reaching this line
754            break;
755        }
756
757        quota
758    }
759
760    /// Scan mountinfo for cgroup v1 mountpoint with a cpu controller
761    ///
762    /// If the cgroupfs is a bind mount then `group_path` is adjusted to skip
763    /// over the already-included prefix
764    fn find_mountpoint(group_path: &Path) -> Option<(Cow<'static, str>, &Path)> {
765        let mut reader = File::open_buffered("/proc/self/mountinfo").ok()?;
766        let mut line = String::with_capacity(256);
767        loop {
768            line.clear();
769            if reader.read_line(&mut line).ok()? == 0 {
770                break;
771            }
772
773            let line = line.trim();
774            let mut items = line.split(' ');
775
776            let sub_path = items.nth(3)?;
777            let mount_point = items.next()?;
778            let mount_opts = items.next_back()?;
779            let filesystem_type = items.nth_back(1)?;
780
781            if filesystem_type != "cgroup" || !mount_opts.split(',').any(|opt| opt == "cpu") {
782                // not a cgroup / not a cpu-controller
783                continue;
784            }
785
786            let sub_path = Path::new(sub_path).strip_prefix("/").ok()?;
787
788            if !group_path.starts_with(sub_path) {
789                // this is a bind-mount and the bound subdirectory
790                // does not contain the cgroup this process belongs to
791                continue;
792            }
793
794            let trimmed_group_path = group_path.strip_prefix(sub_path).ok()?;
795
796            return Some((Cow::Owned(mount_point.to_owned()), trimmed_group_path));
797        }
798
799        None
800    }
801}
802
803// glibc >= 2.15 has a __pthread_get_minstack() function that returns
804// PTHREAD_STACK_MIN plus bytes needed for thread-local storage.
805// We need that information to avoid blowing up when a small stack
806// is created in an application with big thread-local storage requirements.
807// See #6233 for rationale and details.
808#[cfg(all(target_os = "linux", target_env = "gnu"))]
809unsafe fn min_stack_size(attr: *const libc::pthread_attr_t) -> usize {
810    // We use dlsym to avoid an ELF version dependency on GLIBC_PRIVATE. (#23628)
811    // We shouldn't really be using such an internal symbol, but there's currently
812    // no other way to account for the TLS size.
813    dlsym!(
814        fn __pthread_get_minstack(attr: *const libc::pthread_attr_t) -> libc::size_t;
815    );
816
817    match __pthread_get_minstack.get() {
818        None => libc::PTHREAD_STACK_MIN,
819        Some(f) => unsafe { f(attr) },
820    }
821}
822
823// No point in looking up __pthread_get_minstack() on non-glibc platforms.
824#[cfg(all(
825    not(all(target_os = "linux", target_env = "gnu")),
826    not(any(target_os = "netbsd", target_os = "nuttx"))
827))]
828unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
829    libc::PTHREAD_STACK_MIN
830}
831
832#[cfg(any(target_os = "netbsd", target_os = "nuttx"))]
833unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
834    static STACK: crate::sync::OnceLock<usize> = crate::sync::OnceLock::new();
835
836    *STACK.get_or_init(|| {
837        let mut stack = unsafe { libc::sysconf(libc::_SC_THREAD_STACK_MIN) };
838        if stack < 0 {
839            stack = 2048; // just a guess
840        }
841
842        stack as usize
843    })
844}