std/sys/thread/
unix.rs

1#[cfg(not(any(
2    target_env = "newlib",
3    target_os = "l4re",
4    target_os = "emscripten",
5    target_os = "redox",
6    target_os = "hurd",
7    target_os = "aix",
8)))]
9use crate::ffi::CStr;
10use crate::mem::{self, ManuallyDrop};
11use crate::num::NonZero;
12#[cfg(all(target_os = "linux", target_env = "gnu"))]
13use crate::sys::weak::dlsym;
14#[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto",))]
15use crate::sys::weak::weak;
16use crate::sys::{os, stack_overflow};
17use crate::time::Duration;
18use crate::{cmp, io, ptr};
19#[cfg(not(any(
20    target_os = "l4re",
21    target_os = "vxworks",
22    target_os = "espidf",
23    target_os = "nuttx"
24)))]
25pub const DEFAULT_MIN_STACK_SIZE: usize = 2 * 1024 * 1024;
26#[cfg(target_os = "l4re")]
27pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 1024;
28#[cfg(target_os = "vxworks")]
29pub const DEFAULT_MIN_STACK_SIZE: usize = 256 * 1024;
30#[cfg(any(target_os = "espidf", target_os = "nuttx"))]
31pub const DEFAULT_MIN_STACK_SIZE: usize = 0; // 0 indicates that the stack size configured in the ESP-IDF/NuttX menuconfig system should be used
32
33struct ThreadData {
34    name: Option<Box<str>>,
35    f: Box<dyn FnOnce()>,
36}
37
38pub struct Thread {
39    id: libc::pthread_t,
40}
41
42// Some platforms may have pthread_t as a pointer in which case we still want
43// a thread to be Send/Sync
44unsafe impl Send for Thread {}
45unsafe impl Sync for Thread {}
46
47impl Thread {
48    // unsafe: see thread::Builder::spawn_unchecked for safety requirements
49    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
50    pub unsafe fn new(
51        stack: usize,
52        name: Option<&str>,
53        f: Box<dyn FnOnce()>,
54    ) -> io::Result<Thread> {
55        let data = Box::into_raw(Box::new(ThreadData { name: name.map(Box::from), f }));
56        let mut native: libc::pthread_t = mem::zeroed();
57        let mut attr: mem::MaybeUninit<libc::pthread_attr_t> = mem::MaybeUninit::uninit();
58        assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0);
59
60        #[cfg(any(target_os = "espidf", target_os = "nuttx"))]
61        if stack > 0 {
62            // Only set the stack if a non-zero value is passed
63            // 0 is used as an indication that the default stack size configured in the ESP-IDF/NuttX menuconfig system should be used
64            assert_eq!(
65                libc::pthread_attr_setstacksize(
66                    attr.as_mut_ptr(),
67                    cmp::max(stack, min_stack_size(attr.as_ptr()))
68                ),
69                0
70            );
71        }
72
73        #[cfg(not(any(target_os = "espidf", target_os = "nuttx")))]
74        {
75            let stack_size = cmp::max(stack, min_stack_size(attr.as_ptr()));
76
77            match libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size) {
78                0 => {}
79                n => {
80                    assert_eq!(n, libc::EINVAL);
81                    // EINVAL means |stack_size| is either too small or not a
82                    // multiple of the system page size. Because it's definitely
83                    // >= PTHREAD_STACK_MIN, it must be an alignment issue.
84                    // Round up to the nearest page and try again.
85                    let page_size = os::page_size();
86                    let stack_size =
87                        (stack_size + page_size - 1) & (-(page_size as isize - 1) as usize - 1);
88
89                    // Some libc implementations, e.g. musl, place an upper bound
90                    // on the stack size, in which case we can only gracefully return
91                    // an error here.
92                    if libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size) != 0 {
93                        assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0);
94                        drop(Box::from_raw(data));
95                        return Err(io::const_error!(
96                            io::ErrorKind::InvalidInput,
97                            "invalid stack size"
98                        ));
99                    }
100                }
101            };
102        }
103
104        let ret = libc::pthread_create(&mut native, attr.as_ptr(), thread_start, data as *mut _);
105        // Note: if the thread creation fails and this assert fails, then p will
106        // be leaked. However, an alternative design could cause double-free
107        // which is clearly worse.
108        assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0);
109
110        return if ret != 0 {
111            // The thread failed to start and as a result p was not consumed. Therefore, it is
112            // safe to reconstruct the box so that it gets deallocated.
113            drop(Box::from_raw(data));
114            Err(io::Error::from_raw_os_error(ret))
115        } else {
116            Ok(Thread { id: native })
117        };
118
119        extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void {
120            unsafe {
121                let data = Box::from_raw(data as *mut ThreadData);
122                // Next, set up our stack overflow handler which may get triggered if we run
123                // out of stack.
124                let _handler = stack_overflow::Handler::new(data.name);
125                // Finally, let's run some code.
126                (data.f)();
127            }
128            ptr::null_mut()
129        }
130    }
131
132    pub fn join(self) {
133        let id = self.into_id();
134        let ret = unsafe { libc::pthread_join(id, ptr::null_mut()) };
135        assert!(ret == 0, "failed to join thread: {}", io::Error::from_raw_os_error(ret));
136    }
137
138    pub fn id(&self) -> libc::pthread_t {
139        self.id
140    }
141
142    pub fn into_id(self) -> libc::pthread_t {
143        ManuallyDrop::new(self).id
144    }
145}
146
147impl Drop for Thread {
148    fn drop(&mut self) {
149        let ret = unsafe { libc::pthread_detach(self.id) };
150        debug_assert_eq!(ret, 0);
151    }
152}
153
154pub fn available_parallelism() -> io::Result<NonZero<usize>> {
155    cfg_select! {
156        any(
157            target_os = "android",
158            target_os = "emscripten",
159            target_os = "fuchsia",
160            target_os = "hurd",
161            target_os = "linux",
162            target_os = "aix",
163            target_vendor = "apple",
164            target_os = "cygwin",
165        ) => {
166            #[allow(unused_assignments)]
167            #[allow(unused_mut)]
168            let mut quota = usize::MAX;
169
170            #[cfg(any(target_os = "android", target_os = "linux"))]
171            {
172                quota = cgroups::quota().max(1);
173                let mut set: libc::cpu_set_t = unsafe { mem::zeroed() };
174                unsafe {
175                    if libc::sched_getaffinity(0, size_of::<libc::cpu_set_t>(), &mut set) == 0 {
176                        let count = libc::CPU_COUNT(&set) as usize;
177                        let count = count.min(quota);
178
179                        // According to sched_getaffinity's API it should always be non-zero, but
180                        // some old MIPS kernels were buggy and zero-initialized the mask if
181                        // none was explicitly set.
182                        // In that case we use the sysconf fallback.
183                        if let Some(count) = NonZero::new(count) {
184                            return Ok(count)
185                        }
186                    }
187                }
188            }
189            match unsafe { libc::sysconf(libc::_SC_NPROCESSORS_ONLN) } {
190                -1 => Err(io::Error::last_os_error()),
191                0 => Err(io::Error::UNKNOWN_THREAD_COUNT),
192                cpus => {
193                    let count = cpus as usize;
194                    // Cover the unusual situation where we were able to get the quota but not the affinity mask
195                    let count = count.min(quota);
196                    Ok(unsafe { NonZero::new_unchecked(count) })
197                }
198            }
199        }
200        any(
201           target_os = "freebsd",
202           target_os = "dragonfly",
203           target_os = "openbsd",
204           target_os = "netbsd",
205        ) => {
206            use crate::ptr;
207
208            #[cfg(target_os = "freebsd")]
209            {
210                let mut set: libc::cpuset_t = unsafe { mem::zeroed() };
211                unsafe {
212                    if libc::cpuset_getaffinity(
213                        libc::CPU_LEVEL_WHICH,
214                        libc::CPU_WHICH_PID,
215                        -1,
216                        size_of::<libc::cpuset_t>(),
217                        &mut set,
218                    ) == 0 {
219                        let count = libc::CPU_COUNT(&set) as usize;
220                        if count > 0 {
221                            return Ok(NonZero::new_unchecked(count));
222                        }
223                    }
224                }
225            }
226
227            #[cfg(target_os = "netbsd")]
228            {
229                unsafe {
230                    let set = libc::_cpuset_create();
231                    if !set.is_null() {
232                        let mut count: usize = 0;
233                        if libc::pthread_getaffinity_np(libc::pthread_self(), libc::_cpuset_size(set), set) == 0 {
234                            for i in 0..libc::cpuid_t::MAX {
235                                match libc::_cpuset_isset(i, set) {
236                                    -1 => break,
237                                    0 => continue,
238                                    _ => count = count + 1,
239                                }
240                            }
241                        }
242                        libc::_cpuset_destroy(set);
243                        if let Some(count) = NonZero::new(count) {
244                            return Ok(count);
245                        }
246                    }
247                }
248            }
249
250            let mut cpus: libc::c_uint = 0;
251            let mut cpus_size = size_of_val(&cpus);
252
253            unsafe {
254                cpus = libc::sysconf(libc::_SC_NPROCESSORS_ONLN) as libc::c_uint;
255            }
256
257            // Fallback approach in case of errors or no hardware threads.
258            if cpus < 1 {
259                let mut mib = [libc::CTL_HW, libc::HW_NCPU, 0, 0];
260                let res = unsafe {
261                    libc::sysctl(
262                        mib.as_mut_ptr(),
263                        2,
264                        (&raw mut cpus) as *mut _,
265                        (&raw mut cpus_size) as *mut _,
266                        ptr::null_mut(),
267                        0,
268                    )
269                };
270
271                // Handle errors if any.
272                if res == -1 {
273                    return Err(io::Error::last_os_error());
274                } else if cpus == 0 {
275                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
276                }
277            }
278
279            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
280        }
281        target_os = "nto" => {
282            unsafe {
283                use libc::_syspage_ptr;
284                if _syspage_ptr.is_null() {
285                    Err(io::const_error!(io::ErrorKind::NotFound, "no syspage available"))
286                } else {
287                    let cpus = (*_syspage_ptr).num_cpu;
288                    NonZero::new(cpus as usize)
289                        .ok_or(io::Error::UNKNOWN_THREAD_COUNT)
290                }
291            }
292        }
293        any(target_os = "solaris", target_os = "illumos") => {
294            let mut cpus = 0u32;
295            if unsafe { libc::pset_info(libc::PS_MYID, core::ptr::null_mut(), &mut cpus, core::ptr::null_mut()) } != 0 {
296                return Err(io::Error::UNKNOWN_THREAD_COUNT);
297            }
298            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
299        }
300        target_os = "haiku" => {
301            // system_info cpu_count field gets the static data set at boot time with `smp_set_num_cpus`
302            // `get_system_info` calls then `smp_get_num_cpus`
303            unsafe {
304                let mut sinfo: libc::system_info = crate::mem::zeroed();
305                let res = libc::get_system_info(&mut sinfo);
306
307                if res != libc::B_OK {
308                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
309                }
310
311                Ok(NonZero::new_unchecked(sinfo.cpu_count as usize))
312            }
313        }
314        target_os = "vxworks" => {
315            // Note: there is also `vxCpuConfiguredGet`, closer to _SC_NPROCESSORS_CONF
316            // expectations than the actual cores availability.
317
318            // SAFETY: `vxCpuEnabledGet` always fetches a mask with at least one bit set
319            unsafe{
320                let set = libc::vxCpuEnabledGet();
321                Ok(NonZero::new_unchecked(set.count_ones() as usize))
322            }
323        }
324        _ => {
325            // FIXME: implement on Redox, l4re
326            Err(io::const_error!(io::ErrorKind::Unsupported, "getting the number of hardware threads is not supported on the target platform"))
327        }
328    }
329}
330
331pub fn current_os_id() -> Option<u64> {
332    // Most Unix platforms have a way to query an integer ID of the current thread, all with
333    // slightly different spellings.
334    //
335    // The OS thread ID is used rather than `pthread_self` so as to match what will be displayed
336    // for process inspection (debuggers, trace, `top`, etc.).
337    cfg_select! {
338        // Most platforms have a function returning a `pid_t` or int, which is an `i32`.
339        any(target_os = "android", target_os = "linux") => {
340            use crate::sys::pal::weak::syscall;
341
342            // `libc::gettid` is only available on glibc 2.30+, but the syscall is available
343            // since Linux 2.4.11.
344            syscall!(fn gettid() -> libc::pid_t;);
345
346            // SAFETY: FFI call with no preconditions.
347            let id: libc::pid_t = unsafe { gettid() };
348            Some(id as u64)
349        }
350        target_os = "nto" => {
351            // SAFETY: FFI call with no preconditions.
352            let id: libc::pid_t = unsafe { libc::gettid() };
353            Some(id as u64)
354        }
355        target_os = "openbsd" => {
356            // SAFETY: FFI call with no preconditions.
357            let id: libc::pid_t = unsafe { libc::getthrid() };
358            Some(id as u64)
359        }
360        target_os = "freebsd" => {
361            // SAFETY: FFI call with no preconditions.
362            let id: libc::c_int = unsafe { libc::pthread_getthreadid_np() };
363            Some(id as u64)
364        }
365        target_os = "netbsd" => {
366            // SAFETY: FFI call with no preconditions.
367            let id: libc::lwpid_t = unsafe { libc::_lwp_self() };
368            Some(id as u64)
369        }
370        any(target_os = "illumos", target_os = "solaris") => {
371            // On Illumos and Solaris, the `pthread_t` is the same as the OS thread ID.
372            // SAFETY: FFI call with no preconditions.
373            let id: libc::pthread_t = unsafe { libc::pthread_self() };
374            Some(id as u64)
375        }
376        target_vendor = "apple" => {
377            // Apple allows querying arbitrary thread IDs, `thread=NULL` queries the current thread.
378            let mut id = 0u64;
379            // SAFETY: `thread_id` is a valid pointer, no other preconditions.
380            let status: libc::c_int = unsafe { libc::pthread_threadid_np(0, &mut id) };
381            if status == 0 {
382                Some(id)
383            } else {
384                None
385            }
386        }
387        // Other platforms don't have an OS thread ID or don't have a way to access it.
388        _ => None,
389    }
390}
391
392#[cfg(any(
393    target_os = "linux",
394    target_os = "nto",
395    target_os = "solaris",
396    target_os = "illumos",
397    target_os = "vxworks",
398    target_os = "cygwin",
399    target_vendor = "apple",
400))]
401fn truncate_cstr<const MAX_WITH_NUL: usize>(cstr: &CStr) -> [libc::c_char; MAX_WITH_NUL] {
402    let mut result = [0; MAX_WITH_NUL];
403    for (src, dst) in cstr.to_bytes().iter().zip(&mut result[..MAX_WITH_NUL - 1]) {
404        *dst = *src as libc::c_char;
405    }
406    result
407}
408
409#[cfg(target_os = "android")]
410pub fn set_name(name: &CStr) {
411    const PR_SET_NAME: libc::c_int = 15;
412    unsafe {
413        let res = libc::prctl(
414            PR_SET_NAME,
415            name.as_ptr(),
416            0 as libc::c_ulong,
417            0 as libc::c_ulong,
418            0 as libc::c_ulong,
419        );
420        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
421        debug_assert_eq!(res, 0);
422    }
423}
424
425#[cfg(any(
426    target_os = "linux",
427    target_os = "freebsd",
428    target_os = "dragonfly",
429    target_os = "nuttx",
430    target_os = "cygwin"
431))]
432pub fn set_name(name: &CStr) {
433    unsafe {
434        cfg_select! {
435            any(target_os = "linux", target_os = "cygwin") => {
436                // Linux and Cygwin limits the allowed length of the name.
437                const TASK_COMM_LEN: usize = 16;
438                let name = truncate_cstr::<{ TASK_COMM_LEN }>(name);
439            }
440            _ => {
441                // FreeBSD, DragonFly BSD and NuttX do not enforce length limits.
442            }
443        };
444        // Available since glibc 2.12, musl 1.1.16, and uClibc 1.0.20 for Linux,
445        // FreeBSD 12.2 and 13.0, and DragonFly BSD 6.0.
446        let res = libc::pthread_setname_np(libc::pthread_self(), name.as_ptr());
447        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
448        debug_assert_eq!(res, 0);
449    }
450}
451
452#[cfg(target_os = "openbsd")]
453pub fn set_name(name: &CStr) {
454    unsafe {
455        libc::pthread_set_name_np(libc::pthread_self(), name.as_ptr());
456    }
457}
458
459#[cfg(target_vendor = "apple")]
460pub fn set_name(name: &CStr) {
461    unsafe {
462        let name = truncate_cstr::<{ libc::MAXTHREADNAMESIZE }>(name);
463        let res = libc::pthread_setname_np(name.as_ptr());
464        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
465        debug_assert_eq!(res, 0);
466    }
467}
468
469#[cfg(target_os = "netbsd")]
470pub fn set_name(name: &CStr) {
471    unsafe {
472        let res = libc::pthread_setname_np(
473            libc::pthread_self(),
474            c"%s".as_ptr(),
475            name.as_ptr() as *mut libc::c_void,
476        );
477        debug_assert_eq!(res, 0);
478    }
479}
480
481#[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto"))]
482pub fn set_name(name: &CStr) {
483    weak!(
484        fn pthread_setname_np(thread: libc::pthread_t, name: *const libc::c_char) -> libc::c_int;
485    );
486
487    if let Some(f) = pthread_setname_np.get() {
488        #[cfg(target_os = "nto")]
489        const THREAD_NAME_MAX: usize = libc::_NTO_THREAD_NAME_MAX as usize;
490        #[cfg(any(target_os = "solaris", target_os = "illumos"))]
491        const THREAD_NAME_MAX: usize = 32;
492
493        let name = truncate_cstr::<{ THREAD_NAME_MAX }>(name);
494        let res = unsafe { f(libc::pthread_self(), name.as_ptr()) };
495        debug_assert_eq!(res, 0);
496    }
497}
498
499#[cfg(target_os = "fuchsia")]
500pub fn set_name(name: &CStr) {
501    use crate::sys::pal::fuchsia::*;
502    unsafe {
503        zx_object_set_property(
504            zx_thread_self(),
505            ZX_PROP_NAME,
506            name.as_ptr() as *const libc::c_void,
507            name.to_bytes().len(),
508        );
509    }
510}
511
512#[cfg(target_os = "haiku")]
513pub fn set_name(name: &CStr) {
514    unsafe {
515        let thread_self = libc::find_thread(ptr::null_mut());
516        let res = libc::rename_thread(thread_self, name.as_ptr());
517        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
518        debug_assert_eq!(res, libc::B_OK);
519    }
520}
521
522#[cfg(target_os = "vxworks")]
523pub fn set_name(name: &CStr) {
524    let mut name = truncate_cstr::<{ (libc::VX_TASK_RENAME_LENGTH - 1) as usize }>(name);
525    let res = unsafe { libc::taskNameSet(libc::taskIdSelf(), name.as_mut_ptr()) };
526    debug_assert_eq!(res, libc::OK);
527}
528
529#[cfg(not(target_os = "espidf"))]
530pub fn sleep(dur: Duration) {
531    let mut secs = dur.as_secs();
532    let mut nsecs = dur.subsec_nanos() as _;
533
534    // If we're awoken with a signal then the return value will be -1 and
535    // nanosleep will fill in `ts` with the remaining time.
536    unsafe {
537        while secs > 0 || nsecs > 0 {
538            let mut ts = libc::timespec {
539                tv_sec: cmp::min(libc::time_t::MAX as u64, secs) as libc::time_t,
540                tv_nsec: nsecs,
541            };
542            secs -= ts.tv_sec as u64;
543            let ts_ptr = &raw mut ts;
544            if libc::nanosleep(ts_ptr, ts_ptr) == -1 {
545                assert_eq!(os::errno(), libc::EINTR);
546                secs += ts.tv_sec as u64;
547                nsecs = ts.tv_nsec;
548            } else {
549                nsecs = 0;
550            }
551        }
552    }
553}
554
555#[cfg(target_os = "espidf")]
556pub fn sleep(dur: Duration) {
557    // ESP-IDF does not have `nanosleep`, so we use `usleep` instead.
558    // As per the documentation of `usleep`, it is expected to support
559    // sleep times as big as at least up to 1 second.
560    //
561    // ESP-IDF does support almost up to `u32::MAX`, but due to a potential integer overflow in its
562    // `usleep` implementation
563    // (https://github.com/espressif/esp-idf/blob/d7ca8b94c852052e3bc33292287ef4dd62c9eeb1/components/newlib/time.c#L210),
564    // we limit the sleep time to the maximum one that would not cause the underlying `usleep` implementation to overflow
565    // (`portTICK_PERIOD_MS` can be anything between 1 to 1000, and is 10 by default).
566    const MAX_MICROS: u32 = u32::MAX - 1_000_000 - 1;
567
568    // Add any nanoseconds smaller than a microsecond as an extra microsecond
569    // so as to comply with the `std::thread::sleep` contract which mandates
570    // implementations to sleep for _at least_ the provided `dur`.
571    // We can't overflow `micros` as it is a `u128`, while `Duration` is a pair of
572    // (`u64` secs, `u32` nanos), where the nanos are strictly smaller than 1 second
573    // (i.e. < 1_000_000_000)
574    let mut micros = dur.as_micros() + if dur.subsec_nanos() % 1_000 > 0 { 1 } else { 0 };
575
576    while micros > 0 {
577        let st = if micros > MAX_MICROS as u128 { MAX_MICROS } else { micros as u32 };
578        unsafe {
579            libc::usleep(st);
580        }
581
582        micros -= st as u128;
583    }
584}
585
586// Any unix that has clock_nanosleep
587// If this list changes update the MIRI chock_nanosleep shim
588#[cfg(any(
589    target_os = "freebsd",
590    target_os = "netbsd",
591    target_os = "linux",
592    target_os = "android",
593    target_os = "solaris",
594    target_os = "illumos",
595    target_os = "dragonfly",
596    target_os = "hurd",
597    target_os = "fuchsia",
598    target_os = "vxworks",
599))]
600pub fn sleep_until(deadline: crate::time::Instant) {
601    use crate::time::Instant;
602
603    let Some(ts) = deadline.into_inner().into_timespec().to_timespec() else {
604        // The deadline is further in the future then can be passed to
605        // clock_nanosleep. We have to use Self::sleep instead. This might
606        // happen on 32 bit platforms, especially closer to 2038.
607        let now = Instant::now();
608        if let Some(delay) = deadline.checked_duration_since(now) {
609            sleep(delay);
610        }
611        return;
612    };
613
614    unsafe {
615        // When we get interrupted (res = EINTR) call clock_nanosleep again
616        loop {
617            let res = libc::clock_nanosleep(
618                crate::sys::time::Instant::CLOCK_ID,
619                libc::TIMER_ABSTIME,
620                &ts,
621                core::ptr::null_mut(), // not required with TIMER_ABSTIME
622            );
623
624            if res == 0 {
625                break;
626            } else {
627                assert_eq!(
628                    res,
629                    libc::EINTR,
630                    "timespec is in range,
631                         clockid is valid and kernel should support it"
632                );
633            }
634        }
635    }
636}
637
638pub fn yield_now() {
639    let ret = unsafe { libc::sched_yield() };
640    debug_assert_eq!(ret, 0);
641}
642
643#[cfg(any(target_os = "android", target_os = "linux"))]
644mod cgroups {
645    //! Currently not covered
646    //! * cgroup v2 in non-standard mountpoints
647    //! * paths containing control characters or spaces, since those would be escaped in procfs
648    //!   output and we don't unescape
649
650    use crate::borrow::Cow;
651    use crate::ffi::OsString;
652    use crate::fs::{File, exists};
653    use crate::io::{BufRead, Read};
654    use crate::os::unix::ffi::OsStringExt;
655    use crate::path::{Path, PathBuf};
656    use crate::str::from_utf8;
657
658    #[derive(PartialEq)]
659    enum Cgroup {
660        V1,
661        V2,
662    }
663
664    /// Returns cgroup CPU quota in core-equivalents, rounded down or usize::MAX if the quota cannot
665    /// be determined or is not set.
666    pub(super) fn quota() -> usize {
667        let mut quota = usize::MAX;
668        if cfg!(miri) {
669            // Attempting to open a file fails under default flags due to isolation.
670            // And Miri does not have parallelism anyway.
671            return quota;
672        }
673
674        let _: Option<()> = try {
675            let mut buf = Vec::with_capacity(128);
676            // find our place in the cgroup hierarchy
677            File::open("/proc/self/cgroup").ok()?.read_to_end(&mut buf).ok()?;
678            let (cgroup_path, version) =
679                buf.split(|&c| c == b'\n').fold(None, |previous, line| {
680                    let mut fields = line.splitn(3, |&c| c == b':');
681                    // 2nd field is a list of controllers for v1 or empty for v2
682                    let version = match fields.nth(1) {
683                        Some(b"") => Cgroup::V2,
684                        Some(controllers)
685                            if from_utf8(controllers)
686                                .is_ok_and(|c| c.split(',').any(|c| c == "cpu")) =>
687                        {
688                            Cgroup::V1
689                        }
690                        _ => return previous,
691                    };
692
693                    // already-found v1 trumps v2 since it explicitly specifies its controllers
694                    if previous.is_some() && version == Cgroup::V2 {
695                        return previous;
696                    }
697
698                    let path = fields.last()?;
699                    // skip leading slash
700                    Some((path[1..].to_owned(), version))
701                })?;
702            let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path));
703
704            quota = match version {
705                Cgroup::V1 => quota_v1(cgroup_path),
706                Cgroup::V2 => quota_v2(cgroup_path),
707            };
708        };
709
710        quota
711    }
712
713    fn quota_v2(group_path: PathBuf) -> usize {
714        let mut quota = usize::MAX;
715
716        let mut path = PathBuf::with_capacity(128);
717        let mut read_buf = String::with_capacity(20);
718
719        // standard mount location defined in file-hierarchy(7) manpage
720        let cgroup_mount = "/sys/fs/cgroup";
721
722        path.push(cgroup_mount);
723        path.push(&group_path);
724
725        path.push("cgroup.controllers");
726
727        // skip if we're not looking at cgroup2
728        if matches!(exists(&path), Err(_) | Ok(false)) {
729            return usize::MAX;
730        };
731
732        path.pop();
733
734        let _: Option<()> = try {
735            while path.starts_with(cgroup_mount) {
736                path.push("cpu.max");
737
738                read_buf.clear();
739
740                if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() {
741                    let raw_quota = read_buf.lines().next()?;
742                    let mut raw_quota = raw_quota.split(' ');
743                    let limit = raw_quota.next()?;
744                    let period = raw_quota.next()?;
745                    match (limit.parse::<usize>(), period.parse::<usize>()) {
746                        (Ok(limit), Ok(period)) if period > 0 => {
747                            quota = quota.min(limit / period);
748                        }
749                        _ => {}
750                    }
751                }
752
753                path.pop(); // pop filename
754                path.pop(); // pop dir
755            }
756        };
757
758        quota
759    }
760
761    fn quota_v1(group_path: PathBuf) -> usize {
762        let mut quota = usize::MAX;
763        let mut path = PathBuf::with_capacity(128);
764        let mut read_buf = String::with_capacity(20);
765
766        // Hardcode commonly used locations mentioned in the cgroups(7) manpage
767        // if that doesn't work scan mountinfo and adjust `group_path` for bind-mounts
768        let mounts: &[fn(&Path) -> Option<(_, &Path)>] = &[
769            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu"), p)),
770            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu,cpuacct"), p)),
771            // this can be expensive on systems with tons of mountpoints
772            // but we only get to this point when /proc/self/cgroups explicitly indicated
773            // this process belongs to a cpu-controller cgroup v1 and the defaults didn't work
774            find_mountpoint,
775        ];
776
777        for mount in mounts {
778            let Some((mount, group_path)) = mount(&group_path) else { continue };
779
780            path.clear();
781            path.push(mount.as_ref());
782            path.push(&group_path);
783
784            // skip if we guessed the mount incorrectly
785            if matches!(exists(&path), Err(_) | Ok(false)) {
786                continue;
787            }
788
789            while path.starts_with(mount.as_ref()) {
790                let mut parse_file = |name| {
791                    path.push(name);
792                    read_buf.clear();
793
794                    let f = File::open(&path);
795                    path.pop(); // restore buffer before any early returns
796                    f.ok()?.read_to_string(&mut read_buf).ok()?;
797                    let parsed = read_buf.trim().parse::<usize>().ok()?;
798
799                    Some(parsed)
800                };
801
802                let limit = parse_file("cpu.cfs_quota_us");
803                let period = parse_file("cpu.cfs_period_us");
804
805                match (limit, period) {
806                    (Some(limit), Some(period)) if period > 0 => quota = quota.min(limit / period),
807                    _ => {}
808                }
809
810                path.pop();
811            }
812
813            // we passed the try_exists above so we should have traversed the correct hierarchy
814            // when reaching this line
815            break;
816        }
817
818        quota
819    }
820
821    /// Scan mountinfo for cgroup v1 mountpoint with a cpu controller
822    ///
823    /// If the cgroupfs is a bind mount then `group_path` is adjusted to skip
824    /// over the already-included prefix
825    fn find_mountpoint(group_path: &Path) -> Option<(Cow<'static, str>, &Path)> {
826        let mut reader = File::open_buffered("/proc/self/mountinfo").ok()?;
827        let mut line = String::with_capacity(256);
828        loop {
829            line.clear();
830            if reader.read_line(&mut line).ok()? == 0 {
831                break;
832            }
833
834            let line = line.trim();
835            let mut items = line.split(' ');
836
837            let sub_path = items.nth(3)?;
838            let mount_point = items.next()?;
839            let mount_opts = items.next_back()?;
840            let filesystem_type = items.nth_back(1)?;
841
842            if filesystem_type != "cgroup" || !mount_opts.split(',').any(|opt| opt == "cpu") {
843                // not a cgroup / not a cpu-controller
844                continue;
845            }
846
847            let sub_path = Path::new(sub_path).strip_prefix("/").ok()?;
848
849            if !group_path.starts_with(sub_path) {
850                // this is a bind-mount and the bound subdirectory
851                // does not contain the cgroup this process belongs to
852                continue;
853            }
854
855            let trimmed_group_path = group_path.strip_prefix(sub_path).ok()?;
856
857            return Some((Cow::Owned(mount_point.to_owned()), trimmed_group_path));
858        }
859
860        None
861    }
862}
863
864// glibc >= 2.15 has a __pthread_get_minstack() function that returns
865// PTHREAD_STACK_MIN plus bytes needed for thread-local storage.
866// We need that information to avoid blowing up when a small stack
867// is created in an application with big thread-local storage requirements.
868// See #6233 for rationale and details.
869#[cfg(all(target_os = "linux", target_env = "gnu"))]
870unsafe fn min_stack_size(attr: *const libc::pthread_attr_t) -> usize {
871    // We use dlsym to avoid an ELF version dependency on GLIBC_PRIVATE. (#23628)
872    // We shouldn't really be using such an internal symbol, but there's currently
873    // no other way to account for the TLS size.
874    dlsym!(
875        fn __pthread_get_minstack(attr: *const libc::pthread_attr_t) -> libc::size_t;
876    );
877
878    match __pthread_get_minstack.get() {
879        None => libc::PTHREAD_STACK_MIN,
880        Some(f) => unsafe { f(attr) },
881    }
882}
883
884// No point in looking up __pthread_get_minstack() on non-glibc platforms.
885#[cfg(all(
886    not(all(target_os = "linux", target_env = "gnu")),
887    not(any(target_os = "netbsd", target_os = "nuttx"))
888))]
889unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
890    libc::PTHREAD_STACK_MIN
891}
892
893#[cfg(any(target_os = "netbsd", target_os = "nuttx"))]
894unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
895    static STACK: crate::sync::OnceLock<usize> = crate::sync::OnceLock::new();
896
897    *STACK.get_or_init(|| {
898        let mut stack = unsafe { libc::sysconf(libc::_SC_THREAD_STACK_MIN) };
899        if stack < 0 {
900            stack = 2048; // just a guess
901        }
902
903        stack as usize
904    })
905}