core/hint.rs
1#![stable(feature = "core_hint", since = "1.27.0")]
2
3//! Hints to compiler that affects how code should be emitted or optimized.
4//!
5//! Hints may be compile time or runtime.
6
7use crate::marker::Destruct;
8use crate::mem::MaybeUninit;
9use crate::{intrinsics, ub_checks};
10
11/// Informs the compiler that the site which is calling this function is not
12/// reachable, possibly enabling further optimizations.
13///
14/// # Safety
15///
16/// Reaching this function is *Undefined Behavior*.
17///
18/// As the compiler assumes that all forms of Undefined Behavior can never
19/// happen, it will eliminate all branches in the surrounding code that it can
20/// determine will invariably lead to a call to `unreachable_unchecked()`.
21///
22/// If the assumptions embedded in using this function turn out to be wrong -
23/// that is, if the site which is calling `unreachable_unchecked()` is actually
24/// reachable at runtime - the compiler may have generated nonsensical machine
25/// instructions for this situation, including in seemingly unrelated code,
26/// causing difficult-to-debug problems.
27///
28/// Use this function sparingly. Consider using the [`unreachable!`] macro,
29/// which may prevent some optimizations but will safely panic in case it is
30/// actually reached at runtime. Benchmark your code to find out if using
31/// `unreachable_unchecked()` comes with a performance benefit.
32///
33/// # Examples
34///
35/// `unreachable_unchecked()` can be used in situations where the compiler
36/// can't prove invariants that were previously established. Such situations
37/// have a higher chance of occurring if those invariants are upheld by
38/// external code that the compiler can't analyze.
39/// ```
40/// fn prepare_inputs(divisors: &mut Vec<u32>) {
41/// // Note to future-self when making changes: The invariant established
42/// // here is NOT checked in `do_computation()`; if this changes, you HAVE
43/// // to change `do_computation()`.
44/// divisors.retain(|divisor| *divisor != 0)
45/// }
46///
47/// /// # Safety
48/// /// All elements of `divisor` must be non-zero.
49/// unsafe fn do_computation(i: u32, divisors: &[u32]) -> u32 {
50/// divisors.iter().fold(i, |acc, divisor| {
51/// // Convince the compiler that a division by zero can't happen here
52/// // and a check is not needed below.
53/// if *divisor == 0 {
54/// // Safety: `divisor` can't be zero because of `prepare_inputs`,
55/// // but the compiler does not know about this. We *promise*
56/// // that we always call `prepare_inputs`.
57/// unsafe { std::hint::unreachable_unchecked() }
58/// }
59/// // The compiler would normally introduce a check here that prevents
60/// // a division by zero. However, if `divisor` was zero, the branch
61/// // above would reach what we explicitly marked as unreachable.
62/// // The compiler concludes that `divisor` can't be zero at this point
63/// // and removes the - now proven useless - check.
64/// acc / divisor
65/// })
66/// }
67///
68/// let mut divisors = vec![2, 0, 4];
69/// prepare_inputs(&mut divisors);
70/// let result = unsafe {
71/// // Safety: prepare_inputs() guarantees that divisors is non-zero
72/// do_computation(100, &divisors)
73/// };
74/// assert_eq!(result, 12);
75///
76/// ```
77///
78/// While using `unreachable_unchecked()` is perfectly sound in the following
79/// example, as the compiler is able to prove that a division by zero is not
80/// possible, benchmarking reveals that `unreachable_unchecked()` provides
81/// no benefit over using [`unreachable!`], while the latter does not introduce
82/// the possibility of Undefined Behavior.
83///
84/// ```
85/// fn div_1(a: u32, b: u32) -> u32 {
86/// use std::hint::unreachable_unchecked;
87///
88/// // `b.saturating_add(1)` is always positive (not zero),
89/// // hence `checked_div` will never return `None`.
90/// // Therefore, the else branch is unreachable.
91/// a.checked_div(b.saturating_add(1))
92/// .unwrap_or_else(|| unsafe { unreachable_unchecked() })
93/// }
94///
95/// assert_eq!(div_1(7, 0), 7);
96/// assert_eq!(div_1(9, 1), 4);
97/// assert_eq!(div_1(11, u32::MAX), 0);
98/// ```
99#[inline]
100#[stable(feature = "unreachable", since = "1.27.0")]
101#[rustc_const_stable(feature = "const_unreachable_unchecked", since = "1.57.0")]
102#[track_caller]
103#[coverage(off)] // Ferrocene addition: this function breaks llvm-cov
104#[ferrocene::prevalidated]
105pub const unsafe fn unreachable_unchecked() -> ! {
106 ub_checks::assert_unsafe_precondition!(
107 check_language_ub,
108 "hint::unreachable_unchecked must never be reached",
109 () => false
110 );
111 // SAFETY: the safety contract for `intrinsics::unreachable` must
112 // be upheld by the caller.
113 unsafe { intrinsics::unreachable() }
114}
115
116/// Makes a *soundness* promise to the compiler that `cond` holds.
117///
118/// This may allow the optimizer to simplify things, but it might also make the generated code
119/// slower. Either way, calling it will most likely make compilation take longer.
120///
121/// You may know this from other places as
122/// [`llvm.assume`](https://llvm.org/docs/LangRef.html#llvm-assume-intrinsic) or, in C,
123/// [`__builtin_assume`](https://clang.llvm.org/docs/LanguageExtensions.html#builtin-assume).
124///
125/// This promotes a correctness requirement to a soundness requirement. Don't do that without
126/// very good reason.
127///
128/// # Usage
129///
130/// This is a situational tool for micro-optimization, and is allowed to do nothing. Any use
131/// should come with a repeatable benchmark to show the value, with the expectation to drop it
132/// later should the optimizer get smarter and no longer need it.
133///
134/// The more complicated the condition, the less likely this is to be useful. For example,
135/// `assert_unchecked(foo.is_sorted())` is a complex enough value that the compiler is unlikely
136/// to be able to take advantage of it.
137///
138/// There's also no need to `assert_unchecked` basic properties of things. For example, the
139/// compiler already knows the range of `count_ones`, so there is no benefit to
140/// `let n = u32::count_ones(x); assert_unchecked(n <= u32::BITS);`.
141///
142/// `assert_unchecked` is logically equivalent to `if !cond { unreachable_unchecked(); }`. If
143/// ever you are tempted to write `assert_unchecked(false)`, you should instead use
144/// [`unreachable_unchecked()`] directly.
145///
146/// # Safety
147///
148/// `cond` must be `true`. It is immediate UB to call this with `false`.
149///
150/// # Example
151///
152/// ```
153/// use core::hint;
154///
155/// /// # Safety
156/// ///
157/// /// `p` must be nonnull and valid
158/// pub unsafe fn next_value(p: *const i32) -> i32 {
159/// // SAFETY: caller invariants guarantee that `p` is not null
160/// unsafe { hint::assert_unchecked(!p.is_null()) }
161///
162/// if p.is_null() {
163/// return -1;
164/// } else {
165/// // SAFETY: caller invariants guarantee that `p` is valid
166/// unsafe { *p + 1 }
167/// }
168/// }
169/// ```
170///
171/// Without the `assert_unchecked`, the above function produces the following with optimizations
172/// enabled:
173///
174/// ```asm
175/// next_value:
176/// test rdi, rdi
177/// je .LBB0_1
178/// mov eax, dword ptr [rdi]
179/// inc eax
180/// ret
181/// .LBB0_1:
182/// mov eax, -1
183/// ret
184/// ```
185///
186/// Adding the assertion allows the optimizer to remove the extra check:
187///
188/// ```asm
189/// next_value:
190/// mov eax, dword ptr [rdi]
191/// inc eax
192/// ret
193/// ```
194///
195/// This example is quite unlike anything that would be used in the real world: it is redundant
196/// to put an assertion right next to code that checks the same thing, and dereferencing a
197/// pointer already has the builtin assumption that it is nonnull. However, it illustrates the
198/// kind of changes the optimizer can make even when the behavior is less obviously related.
199#[track_caller]
200#[inline(always)]
201#[doc(alias = "assume")]
202#[stable(feature = "hint_assert_unchecked", since = "1.81.0")]
203#[rustc_const_stable(feature = "hint_assert_unchecked", since = "1.81.0")]
204#[ferrocene::prevalidated]
205pub const unsafe fn assert_unchecked(cond: bool) {
206 // SAFETY: The caller promised `cond` is true.
207 unsafe {
208 ub_checks::assert_unsafe_precondition!(
209 check_language_ub,
210 "hint::assert_unchecked must never be called when the condition is false",
211 (cond: bool = cond) => cond,
212 );
213 crate::intrinsics::assume(cond);
214 }
215}
216
217/// Emits a machine instruction to signal the processor that it is running in
218/// a busy-wait spin-loop ("spin lock").
219///
220/// Upon receiving the spin-loop signal the processor can optimize its behavior by,
221/// for example, saving power or switching hyper-threads.
222///
223/// This function is different from [`thread::yield_now`] which directly
224/// yields to the system's scheduler, whereas `spin_loop` does not interact
225/// with the operating system.
226///
227/// A common use case for `spin_loop` is implementing bounded optimistic
228/// spinning in a CAS loop in synchronization primitives. To avoid problems
229/// like priority inversion, it is strongly recommended that the spin loop is
230/// terminated after a finite amount of iterations and an appropriate blocking
231/// syscall is made.
232///
233/// **Note**: On platforms that do not support receiving spin-loop hints this
234/// function does not do anything at all.
235///
236/// # Examples
237///
238/// ```ignore-wasm
239/// use std::sync::atomic::{AtomicBool, Ordering};
240/// use std::sync::Arc;
241/// use std::{hint, thread};
242///
243/// // A shared atomic value that threads will use to coordinate
244/// let live = Arc::new(AtomicBool::new(false));
245///
246/// // In a background thread we'll eventually set the value
247/// let bg_work = {
248/// let live = live.clone();
249/// thread::spawn(move || {
250/// // Do some work, then make the value live
251/// do_some_work();
252/// live.store(true, Ordering::Release);
253/// })
254/// };
255///
256/// // Back on our current thread, we wait for the value to be set
257/// while !live.load(Ordering::Acquire) {
258/// // The spin loop is a hint to the CPU that we're waiting, but probably
259/// // not for very long
260/// hint::spin_loop();
261/// }
262///
263/// // The value is now set
264/// # fn do_some_work() {}
265/// do_some_work();
266/// bg_work.join()?;
267/// # Ok::<(), Box<dyn core::any::Any + Send + 'static>>(())
268/// ```
269///
270/// [`thread::yield_now`]: ../../std/thread/fn.yield_now.html
271#[inline(always)]
272#[stable(feature = "renamed_spin_loop", since = "1.49.0")]
273pub fn spin_loop() {
274 crate::cfg_select! {
275 miri => {
276 unsafe extern "Rust" {
277 safe fn miri_spin_loop();
278 }
279
280 // Miri does support some of the intrinsics that are called below, but to guarantee
281 // consistent behavior across targets, this custom function is used.
282 miri_spin_loop();
283 }
284 target_arch = "x86" => {
285 // SAFETY: the `cfg` attr ensures that we only execute this on x86 targets.
286 crate::arch::x86::_mm_pause()
287 }
288 target_arch = "x86_64" => {
289 // SAFETY: the `cfg` attr ensures that we only execute this on x86_64 targets.
290 crate::arch::x86_64::_mm_pause()
291 }
292 target_arch = "riscv32" => crate::arch::riscv32::pause(),
293 target_arch = "riscv64" => crate::arch::riscv64::pause(),
294 any(target_arch = "aarch64", target_arch = "arm64ec") => {
295 // SAFETY: the `cfg` attr ensures that we only execute this on aarch64 targets.
296 unsafe { crate::arch::aarch64::__isb(crate::arch::aarch64::SY) }
297 }
298 all(
299 target_arch = "arm",
300 any(
301 all(target_feature = "v6k", not(target_feature = "thumb-mode")),
302 target_feature = "v6t2",
303 all(target_feature = "v6", target_feature = "mclass"),
304 )
305 ) => {
306 // SAFETY: the `cfg` attr ensures that we only execute this on arm
307 // targets with support for the this feature. On ARMv6 in Thumb
308 // mode, T2 is required (see Arm DDI0406C Section A8.8.427),
309 // otherwise ARMv6-M or ARMv6K is enough
310 unsafe { crate::arch::arm::__yield() }
311 }
312 target_arch = "loongarch32" => crate::arch::loongarch32::ibar::<0>(),
313 target_arch = "loongarch64" => crate::arch::loongarch64::ibar::<0>(),
314 _ => { /* do nothing */ }
315 }
316}
317
318/// An identity function that *__hints__* to the compiler to be maximally pessimistic about what
319/// `black_box` could do.
320///
321/// Unlike [`std::convert::identity`], a Rust compiler is encouraged to assume that `black_box` can
322/// use `dummy` in any possible valid way that Rust code is allowed to without introducing undefined
323/// behavior in the calling code. This property makes `black_box` useful for writing code in which
324/// certain optimizations are not desired, such as benchmarks.
325///
326/// <div class="warning">
327///
328/// Note however, that `black_box` is only (and can only be) provided on a "best-effort" basis. The
329/// extent to which it can block optimisations may vary depending upon the platform and code-gen
330/// backend used. Programs cannot rely on `black_box` for *correctness*, beyond it behaving as the
331/// identity function. As such, it **must not be relied upon to control critical program behavior.**
332/// This also means that this function does not offer any guarantees for cryptographic or security
333/// purposes.
334///
335/// This limitation is not specific to `black_box`; there is no mechanism in the entire Rust
336/// language that can provide the guarantees required for constant-time cryptography.
337/// (There is also no such mechanism in LLVM, so the same is true for every other LLVM-based compiler.)
338///
339/// </div>
340///
341/// [`std::convert::identity`]: crate::convert::identity
342///
343/// # When is this useful?
344///
345/// While not suitable in those mission-critical cases, `black_box`'s functionality can generally be
346/// relied upon for benchmarking, and should be used there. It will try to ensure that the
347/// compiler doesn't optimize away part of the intended test code based on context. For
348/// example:
349///
350/// ```
351/// fn contains(haystack: &[&str], needle: &str) -> bool {
352/// haystack.iter().any(|x| x == &needle)
353/// }
354///
355/// pub fn benchmark() {
356/// let haystack = vec!["abc", "def", "ghi", "jkl", "mno"];
357/// let needle = "ghi";
358/// for _ in 0..10 {
359/// contains(&haystack, needle);
360/// }
361/// }
362/// ```
363///
364/// The compiler could theoretically make optimizations like the following:
365///
366/// - The `needle` and `haystack` do not change, move the call to `contains` outside the loop and
367/// delete the loop
368/// - Inline `contains`
369/// - `needle` and `haystack` have values known at compile time, `contains` is always true. Remove
370/// the call and replace with `true`
371/// - Nothing is done with the result of `contains`: delete this function call entirely
372/// - `benchmark` now has no purpose: delete this function
373///
374/// It is not likely that all of the above happens, but the compiler is definitely able to make some
375/// optimizations that could result in a very inaccurate benchmark. This is where `black_box` comes
376/// in:
377///
378/// ```
379/// use std::hint::black_box;
380///
381/// // Same `contains` function.
382/// fn contains(haystack: &[&str], needle: &str) -> bool {
383/// haystack.iter().any(|x| x == &needle)
384/// }
385///
386/// pub fn benchmark() {
387/// let haystack = vec!["abc", "def", "ghi", "jkl", "mno"];
388/// let needle = "ghi";
389/// for _ in 0..10 {
390/// // Force the compiler to run `contains`, even though it is a pure function whose
391/// // results are unused.
392/// black_box(contains(
393/// // Prevent the compiler from making assumptions about the input.
394/// black_box(&haystack),
395/// black_box(needle),
396/// ));
397/// }
398/// }
399/// ```
400///
401/// This essentially tells the compiler to block optimizations across any calls to `black_box`. So,
402/// it now:
403///
404/// - Treats both arguments to `contains` as unpredictable: the body of `contains` can no longer be
405/// optimized based on argument values
406/// - Treats the call to `contains` and its result as volatile: the body of `benchmark` cannot
407/// optimize this away
408///
409/// This makes our benchmark much more realistic to how the function would actually be used, where
410/// arguments are usually not known at compile time and the result is used in some way.
411///
412/// # How to use this
413///
414/// In practice, `black_box` serves two purposes:
415///
416/// 1. It prevents the compiler from making optimizations related to the value returned by `black_box`
417/// 2. It forces the value passed to `black_box` to be calculated, even if the return value of `black_box` is unused
418///
419/// ```
420/// use std::hint::black_box;
421///
422/// let zero = 0;
423/// let five = 5;
424///
425/// // The compiler will see this and remove the `* five` call, because it knows that multiplying
426/// // any integer by 0 will result in 0.
427/// let c = zero * five;
428///
429/// // Adding `black_box` here disables the compiler's ability to reason about the first operand in the multiplication.
430/// // It is forced to assume that it can be any possible number, so it cannot remove the `* five`
431/// // operation.
432/// let c = black_box(zero) * five;
433/// ```
434///
435/// While most cases will not be as clear-cut as the above example, it still illustrates how
436/// `black_box` can be used. When benchmarking a function, you usually want to wrap its inputs in
437/// `black_box` so the compiler cannot make optimizations that would be unrealistic in real-life
438/// use.
439///
440/// ```
441/// use std::hint::black_box;
442///
443/// // This is a simple function that increments its input by 1. Note that it is pure, meaning it
444/// // has no side-effects. This function has no effect if its result is unused. (An example of a
445/// // function *with* side-effects is `println!()`.)
446/// fn increment(x: u8) -> u8 {
447/// x + 1
448/// }
449///
450/// // Here, we call `increment` but discard its result. The compiler, seeing this and knowing that
451/// // `increment` is pure, will eliminate this function call entirely. This may not be desired,
452/// // though, especially if we're trying to track how much time `increment` takes to execute.
453/// let _ = increment(black_box(5));
454///
455/// // Here, we force `increment` to be executed. This is because the compiler treats `black_box`
456/// // as if it has side-effects, and thus must compute its input.
457/// let _ = black_box(increment(black_box(5)));
458/// ```
459///
460/// There may be additional situations where you want to wrap the result of a function in
461/// `black_box` to force its execution. This is situational though, and may not have any effect
462/// (such as when the function returns a zero-sized type such as [`()` unit][unit]).
463///
464/// Note that `black_box` has no effect on how its input is treated, only its output. As such,
465/// expressions passed to `black_box` may still be optimized:
466///
467/// ```
468/// use std::hint::black_box;
469///
470/// // The compiler sees this...
471/// let y = black_box(5 * 10);
472///
473/// // ...as this. As such, it will likely simplify `5 * 10` to just `50`.
474/// let _0 = 5 * 10;
475/// let y = black_box(_0);
476/// ```
477///
478/// In the above example, the `5 * 10` expression is considered distinct from the `black_box` call,
479/// and thus is still optimized by the compiler. You can prevent this by moving the multiplication
480/// operation outside of `black_box`:
481///
482/// ```
483/// use std::hint::black_box;
484///
485/// // No assumptions can be made about either operand, so the multiplication is not optimized out.
486/// let y = black_box(5) * black_box(10);
487/// ```
488///
489/// During constant evaluation, `black_box` is treated as a no-op.
490#[inline]
491#[stable(feature = "bench_black_box", since = "1.66.0")]
492#[rustc_const_stable(feature = "const_black_box", since = "1.86.0")]
493pub const fn black_box<T>(dummy: T) -> T {
494 crate::intrinsics::black_box(dummy)
495}
496
497/// An identity function that causes an `unused_must_use` warning to be
498/// triggered if the given value is not used (returned, stored in a variable,
499/// etc) by the caller.
500///
501/// This is primarily intended for use in macro-generated code, in which a
502/// [`#[must_use]` attribute][must_use] either on a type or a function would not
503/// be convenient.
504///
505/// [must_use]: https://doc.rust-lang.org/reference/attributes/diagnostics.html#the-must_use-attribute
506///
507/// # Example
508///
509/// ```
510/// #![feature(hint_must_use)]
511///
512/// use core::fmt;
513///
514/// pub struct Error(/* ... */);
515///
516/// #[macro_export]
517/// macro_rules! make_error {
518/// ($($args:expr),*) => {
519/// core::hint::must_use({
520/// let error = make_error(core::format_args!($($args),*));
521/// error
522/// })
523/// };
524/// }
525///
526/// // Implementation detail of make_error! macro.
527/// #[doc(hidden)]
528/// pub fn make_error(args: fmt::Arguments<'_>) -> Error {
529/// Error(/* ... */)
530/// }
531///
532/// fn demo() -> Option<Error> {
533/// if true {
534/// // Oops, meant to write `return Some(make_error!("..."));`
535/// Some(make_error!("..."));
536/// }
537/// None
538/// }
539/// #
540/// # // Make rustdoc not wrap the whole snippet in fn main, so that $crate::make_error works
541/// # fn main() {}
542/// ```
543///
544/// In the above example, we'd like an `unused_must_use` lint to apply to the
545/// value created by `make_error!`. However, neither `#[must_use]` on a struct
546/// nor `#[must_use]` on a function is appropriate here, so the macro expands
547/// using `core::hint::must_use` instead.
548///
549/// - We wouldn't want `#[must_use]` on the `struct Error` because that would
550/// make the following unproblematic code trigger a warning:
551///
552/// ```
553/// # struct Error;
554/// #
555/// fn f(arg: &str) -> Result<(), Error>
556/// # { Ok(()) }
557///
558/// #[test]
559/// fn t() {
560/// // Assert that `f` returns error if passed an empty string.
561/// // A value of type `Error` is unused here but that's not a problem.
562/// f("").unwrap_err();
563/// }
564/// ```
565///
566/// - Using `#[must_use]` on `fn make_error` can't help because the return value
567/// *is* used, as the right-hand side of a `let` statement. The `let`
568/// statement looks useless but is in fact necessary for ensuring that
569/// temporaries within the `format_args` expansion are not kept alive past the
570/// creation of the `Error`, as keeping them alive past that point can cause
571/// autotrait issues in async code:
572///
573/// ```
574/// # #![feature(hint_must_use)]
575/// #
576/// # struct Error;
577/// #
578/// # macro_rules! make_error {
579/// # ($($args:expr),*) => {
580/// # core::hint::must_use({
581/// # // If `let` isn't used, then `f()` produces a non-Send future.
582/// # let error = make_error(core::format_args!($($args),*));
583/// # error
584/// # })
585/// # };
586/// # }
587/// #
588/// # fn make_error(args: core::fmt::Arguments<'_>) -> Error {
589/// # Error
590/// # }
591/// #
592/// async fn f() {
593/// // Using `let` inside the make_error expansion causes temporaries like
594/// // `unsync()` to drop at the semicolon of that `let` statement, which
595/// // is prior to the await point. They would otherwise stay around until
596/// // the semicolon on *this* statement, which is after the await point,
597/// // and the enclosing Future would not implement Send.
598/// log(make_error!("look: {:p}", unsync())).await;
599/// }
600///
601/// async fn log(error: Error) {/* ... */}
602///
603/// // Returns something without a Sync impl.
604/// fn unsync() -> *const () {
605/// 0 as *const ()
606/// }
607/// #
608/// # fn test() {
609/// # fn assert_send(_: impl Send) {}
610/// # assert_send(f());
611/// # }
612/// ```
613#[unstable(feature = "hint_must_use", issue = "94745")]
614#[must_use] // <-- :)
615#[inline(always)]
616pub const fn must_use<T>(value: T) -> T {
617 value
618}
619
620/// Hints to the compiler that a branch condition is likely to be true.
621/// Returns the value passed to it.
622///
623/// It can be used with `if` or boolean `match` expressions.
624///
625/// When used outside of a branch condition, it may still influence a nearby branch, but
626/// probably will not have any effect.
627///
628/// It can also be applied to parts of expressions, such as `likely(a) && unlikely(b)`, or to
629/// compound expressions, such as `likely(a && b)`. When applied to compound expressions, it has
630/// the following effect:
631/// ```text
632/// likely(!a) => !unlikely(a)
633/// likely(a && b) => likely(a) && likely(b)
634/// likely(a || b) => a || likely(b)
635/// ```
636///
637/// See also the function [`cold_path()`] which may be more appropriate for idiomatic Rust code.
638///
639/// # Examples
640///
641/// ```
642/// #![feature(likely_unlikely)]
643/// use core::hint::likely;
644///
645/// fn foo(x: i32) {
646/// if likely(x > 0) {
647/// println!("this branch is likely to be taken");
648/// } else {
649/// println!("this branch is unlikely to be taken");
650/// }
651///
652/// match likely(x > 0) {
653/// true => println!("this branch is likely to be taken"),
654/// false => println!("this branch is unlikely to be taken"),
655/// }
656///
657/// // Use outside of a branch condition may still influence a nearby branch
658/// let cond = likely(x != 0);
659/// if cond {
660/// println!("this branch is likely to be taken");
661/// }
662/// }
663/// ```
664#[unstable(feature = "likely_unlikely", issue = "151619")]
665#[inline(always)]
666pub const fn likely(b: bool) -> bool {
667 crate::intrinsics::likely(b)
668}
669
670/// Hints to the compiler that a branch condition is unlikely to be true.
671/// Returns the value passed to it.
672///
673/// It can be used with `if` or boolean `match` expressions.
674///
675/// When used outside of a branch condition, it may still influence a nearby branch, but
676/// probably will not have any effect.
677///
678/// It can also be applied to parts of expressions, such as `likely(a) && unlikely(b)`, or to
679/// compound expressions, such as `unlikely(a && b)`. When applied to compound expressions, it has
680/// the following effect:
681/// ```text
682/// unlikely(!a) => !likely(a)
683/// unlikely(a && b) => a && unlikely(b)
684/// unlikely(a || b) => unlikely(a) || unlikely(b)
685/// ```
686///
687/// See also the function [`cold_path()`] which may be more appropriate for idiomatic Rust code.
688///
689/// # Examples
690///
691/// ```
692/// #![feature(likely_unlikely)]
693/// use core::hint::unlikely;
694///
695/// fn foo(x: i32) {
696/// if unlikely(x > 0) {
697/// println!("this branch is unlikely to be taken");
698/// } else {
699/// println!("this branch is likely to be taken");
700/// }
701///
702/// match unlikely(x > 0) {
703/// true => println!("this branch is unlikely to be taken"),
704/// false => println!("this branch is likely to be taken"),
705/// }
706///
707/// // Use outside of a branch condition may still influence a nearby branch
708/// let cond = unlikely(x != 0);
709/// if cond {
710/// println!("this branch is likely to be taken");
711/// }
712/// }
713/// ```
714#[unstable(feature = "likely_unlikely", issue = "151619")]
715#[inline(always)]
716pub const fn unlikely(b: bool) -> bool {
717 crate::intrinsics::unlikely(b)
718}
719
720/// Hints to the compiler that given path is cold, i.e., unlikely to be taken. The compiler may
721/// choose to optimize paths that are not cold at the expense of paths that are cold.
722///
723/// Note that like all hints, the exact effect to codegen is not guaranteed. Using `cold_path`
724/// can actually *decrease* performance if the branch is called more than expected. It is advisable
725/// to perform benchmarks to tell if this function is useful.
726///
727/// # Examples
728///
729/// ```
730/// use core::hint::cold_path;
731///
732/// fn foo(x: &[i32]) {
733/// if let Some(first) = x.get(0) {
734/// // this is the fast path
735/// } else {
736/// // this path is unlikely
737/// cold_path();
738/// }
739/// }
740///
741/// fn bar(x: i32) -> i32 {
742/// match x {
743/// 1 => 10,
744/// 2 => 100,
745/// 3 => { cold_path(); 1000 }, // this branch is unlikely
746/// _ => { cold_path(); 10000 }, // this is also unlikely
747/// }
748/// }
749/// ```
750///
751/// This can also be used to implement `likely` and `unlikely` helpers to hint the condition rather
752/// than the branch:
753///
754/// ```
755/// use core::hint::cold_path;
756///
757/// #[inline(always)]
758/// pub const fn likely(b: bool) -> bool {
759/// if !b {
760/// cold_path();
761/// }
762/// b
763/// }
764///
765/// #[inline(always)]
766/// pub const fn unlikely(b: bool) -> bool {
767/// if b {
768/// cold_path();
769/// }
770/// b
771/// }
772///
773/// fn foo(x: i32) {
774/// if likely(x > 0) {
775/// println!("this branch is likely to be taken");
776/// } else {
777/// println!("this branch is unlikely to be taken");
778/// }
779/// }
780/// ```
781#[stable(feature = "cold_path", since = "1.95.0")]
782#[rustc_const_stable(feature = "cold_path", since = "1.95.0")]
783#[inline(always)]
784pub const fn cold_path() {
785 crate::intrinsics::cold_path()
786}
787
788/// Returns either `true_val` or `false_val` depending on the value of
789/// `condition`, with a hint to the compiler that `condition` is unlikely to be
790/// correctly predicted by a CPU’s branch predictor.
791///
792/// This method is functionally equivalent to
793/// ```ignore (this is just for illustrative purposes)
794/// fn select_unpredictable<T>(b: bool, true_val: T, false_val: T) -> T {
795/// if b { true_val } else { false_val }
796/// }
797/// ```
798/// but might generate different assembly. In particular, on platforms with
799/// a conditional move or select instruction (like `cmov` on x86 or `csel`
800/// on ARM) the optimizer might use these instructions to avoid branches,
801/// which can benefit performance if the branch predictor is struggling
802/// with predicting `condition`, such as in an implementation of binary
803/// search.
804///
805/// Note however that this lowering is not guaranteed (on any platform) and
806/// should not be relied upon when trying to write cryptographic constant-time
807/// code. Also be aware that this lowering might *decrease* performance if
808/// `condition` is well-predictable. It is advisable to perform benchmarks to
809/// tell if this function is useful.
810///
811/// # Examples
812///
813/// Distribute values evenly between two buckets:
814/// ```
815/// use std::hash::BuildHasher;
816/// use std::hint;
817///
818/// fn append<H: BuildHasher>(hasher: &H, v: i32, bucket_one: &mut Vec<i32>, bucket_two: &mut Vec<i32>) {
819/// let hash = hasher.hash_one(&v);
820/// let bucket = hint::select_unpredictable(hash % 2 == 0, bucket_one, bucket_two);
821/// bucket.push(v);
822/// }
823/// # let hasher = std::collections::hash_map::RandomState::new();
824/// # let mut bucket_one = Vec::new();
825/// # let mut bucket_two = Vec::new();
826/// # append(&hasher, 42, &mut bucket_one, &mut bucket_two);
827/// # assert_eq!(bucket_one.len() + bucket_two.len(), 1);
828/// ```
829#[inline(always)]
830#[stable(feature = "select_unpredictable", since = "1.88.0")]
831#[rustc_const_unstable(feature = "const_select_unpredictable", issue = "145938")]
832#[ferrocene::prevalidated]
833pub const fn select_unpredictable<T>(condition: bool, true_val: T, false_val: T) -> T
834where
835 T: [const] Destruct,
836{
837 // FIXME(https://github.com/rust-lang/unsafe-code-guidelines/issues/245):
838 // Change this to use ManuallyDrop instead.
839 let mut true_val = MaybeUninit::new(true_val);
840 let mut false_val = MaybeUninit::new(false_val);
841
842 #[ferrocene::prevalidated]
843 struct DropOnPanic<T> {
844 // Invariant: valid pointer and points to an initialized value that is not further used,
845 // i.e. it can be dropped by this guard.
846 inner: *mut T,
847 }
848
849 impl<T> Drop for DropOnPanic<T> {
850 #[ferrocene::prevalidated]
851 fn drop(&mut self) {
852 // SAFETY: Must be guaranteed on construction of local type `DropOnPanic`.
853 unsafe { self.inner.drop_in_place() }
854 }
855 }
856
857 let true_ptr = true_val.as_mut_ptr();
858 let false_ptr = false_val.as_mut_ptr();
859
860 // SAFETY: The value that is not selected is dropped, and the selected one
861 // is returned. This is necessary because the intrinsic doesn't drop the
862 // value that is not selected.
863 unsafe {
864 // Extract the selected value first, ensure it is dropped as well if dropping the unselected
865 // value panics. We construct a temporary by-pointer guard around the selected value while
866 // dropping the unselected value. Arguments overlap here, so we can not use mutable
867 // reference for these arguments.
868 let guard = crate::intrinsics::select_unpredictable(condition, true_ptr, false_ptr);
869 let drop = crate::intrinsics::select_unpredictable(condition, false_ptr, true_ptr);
870
871 // SAFETY: both pointers are well-aligned and point to initialized values inside a
872 // `MaybeUninit` each. In both possible values for `condition` the pointer `guard` and
873 // `drop` do not alias (even though the two argument pairs we have selected from did alias
874 // each other).
875 let guard = DropOnPanic { inner: guard };
876 drop.drop_in_place();
877 crate::mem::forget(guard);
878
879 // Note that it is important to use the values here. Reading from the pointer we got makes
880 // LLVM forget the !unpredictable annotation sometimes (in tests, integer sized values in
881 // particular seemed to confuse it, also observed in llvm/llvm-project #82340).
882 crate::intrinsics::select_unpredictable(condition, true_val, false_val).assume_init()
883 }
884}
885
886/// The expected temporal locality of a memory prefetch operation.
887///
888/// Locality expresses how likely the prefetched data is to be reused soon,
889/// and therefore which level of cache it should be brought into.
890///
891/// The locality is just a hint, and may be ignored on some targets or by the hardware.
892///
893/// Used with functions like [`prefetch_read`] and [`prefetch_write`].
894///
895/// [`prefetch_read`]: crate::hint::prefetch_read
896/// [`prefetch_write`]: crate::hint::prefetch_write
897#[unstable(feature = "hint_prefetch", issue = "146941")]
898#[non_exhaustive]
899#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
900pub enum Locality {
901 /// Data is expected to be reused eventually.
902 ///
903 /// Typically prefetches into L3 cache (if the CPU supports it).
904 L3,
905 /// Data is expected to be reused in the near future.
906 ///
907 /// Typically prefetches into L2 cache.
908 L2,
909 /// Data is expected to be reused very soon.
910 ///
911 /// Typically prefetches into L1 cache.
912 L1,
913}
914
915impl Locality {
916 /// Convert to the constant that LLVM associates with a locality.
917 const fn to_llvm(self) -> i32 {
918 match self {
919 Self::L3 => 1,
920 Self::L2 => 2,
921 Self::L1 => 3,
922 }
923 }
924}
925
926/// Prefetch the cache line containing `ptr` for a future read.
927///
928/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
929/// soon after, but may also increase bandwidth usage or evict other cache lines.
930///
931/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
932///
933/// Passing a dangling or invalid pointer is permitted: the memory will not
934/// actually be dereferenced, and no faults are raised.
935///
936/// # Examples
937///
938/// ```
939/// #![feature(hint_prefetch)]
940/// use std::hint::{Locality, prefetch_read};
941/// use std::mem::size_of_val;
942///
943/// // Prefetch all of `slice` into the L1 cache.
944/// fn prefetch_slice<T>(slice: &[T]) {
945/// // On most systems the cache line size is 64 bytes.
946/// for offset in (0..size_of_val(slice)).step_by(64) {
947/// prefetch_read(slice.as_ptr().wrapping_add(offset), Locality::L1);
948/// }
949/// }
950/// ```
951#[inline(always)]
952#[unstable(feature = "hint_prefetch", issue = "146941")]
953pub const fn prefetch_read<T>(ptr: *const T, locality: Locality) {
954 match locality {
955 Locality::L3 => intrinsics::prefetch_read_data::<T, { Locality::L3.to_llvm() }>(ptr),
956 Locality::L2 => intrinsics::prefetch_read_data::<T, { Locality::L2.to_llvm() }>(ptr),
957 Locality::L1 => intrinsics::prefetch_read_data::<T, { Locality::L1.to_llvm() }>(ptr),
958 }
959}
960
961/// Prefetch the cache line containing `ptr` for a single future read, but attempt to avoid
962/// polluting the cache.
963///
964/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
965/// soon after, but may also increase bandwidth usage or evict other cache lines.
966///
967/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
968///
969/// Passing a dangling or invalid pointer is permitted: the memory will not
970/// actually be dereferenced, and no faults are raised.
971#[inline(always)]
972#[unstable(feature = "hint_prefetch", issue = "146941")]
973pub const fn prefetch_read_non_temporal<T>(ptr: *const T, locality: Locality) {
974 // The LLVM intrinsic does not currently support specifying the locality.
975 let _ = locality;
976 intrinsics::prefetch_read_data::<T, 0>(ptr)
977}
978
979/// Prefetch the cache line containing `ptr` for a future write.
980///
981/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
982/// soon after, but may also increase bandwidth usage or evict other cache lines.
983///
984/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
985///
986/// Passing a dangling or invalid pointer is permitted: the memory will not
987/// actually be dereferenced, and no faults are raised.
988#[inline(always)]
989#[unstable(feature = "hint_prefetch", issue = "146941")]
990pub const fn prefetch_write<T>(ptr: *mut T, locality: Locality) {
991 match locality {
992 Locality::L3 => intrinsics::prefetch_write_data::<T, { Locality::L3.to_llvm() }>(ptr),
993 Locality::L2 => intrinsics::prefetch_write_data::<T, { Locality::L2.to_llvm() }>(ptr),
994 Locality::L1 => intrinsics::prefetch_write_data::<T, { Locality::L1.to_llvm() }>(ptr),
995 }
996}
997
998/// Prefetch the cache line containing `ptr` for a single future write, but attempt to avoid
999/// polluting the cache.
1000///
1001/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
1002/// soon after, but may also increase bandwidth usage or evict other cache lines.
1003///
1004/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
1005///
1006/// Passing a dangling or invalid pointer is permitted: the memory will not
1007/// actually be dereferenced, and no faults are raised.
1008#[inline(always)]
1009#[unstable(feature = "hint_prefetch", issue = "146941")]
1010pub const fn prefetch_write_non_temporal<T>(ptr: *const T, locality: Locality) {
1011 // The LLVM intrinsic does not currently support specifying the locality.
1012 let _ = locality;
1013 intrinsics::prefetch_write_data::<T, 0>(ptr)
1014}
1015
1016/// Prefetch the cache line containing `ptr` into the instruction cache for a future read.
1017///
1018/// A strategically placed prefetch can reduce cache miss latency if the instructions are
1019/// accessed soon after, but may also increase bandwidth usage or evict other cache lines.
1020///
1021/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
1022///
1023/// Passing a dangling or invalid pointer is permitted: the memory will not
1024/// actually be dereferenced, and no faults are raised.
1025#[inline(always)]
1026#[unstable(feature = "hint_prefetch", issue = "146941")]
1027pub const fn prefetch_read_instruction<T>(ptr: *const T, locality: Locality) {
1028 match locality {
1029 Locality::L3 => intrinsics::prefetch_read_instruction::<T, { Locality::L3.to_llvm() }>(ptr),
1030 Locality::L2 => intrinsics::prefetch_read_instruction::<T, { Locality::L2.to_llvm() }>(ptr),
1031 Locality::L1 => intrinsics::prefetch_read_instruction::<T, { Locality::L1.to_llvm() }>(ptr),
1032 }
1033}