1//! Helper trait for generic float types.
23use core::f64;
45use crate::fmt::{Debug, LowerExp};
6use crate::num::FpCategory;
7use crate::ops::{self, Add, Div, Mul, Neg};
89/// Lossy `as` casting between two types.
10pub trait CastInto<T: Copy>: Copy {
11fn cast(self) -> T;
12}
1314/// Collection of traits that allow us to be generic over integer size.
15pub trait Integer:
16 Sized
17 + Clone
18 + Copy
19 + Debug
20 + ops::Shr<u32, Output = Self>
21 + ops::Shl<u32, Output = Self>
22 + ops::BitAnd<Output = Self>
23 + ops::BitOr<Output = Self>
24 + PartialEq
25 + CastInto<i16>
26{
27const ZERO: Self;
28const ONE: Self;
29}
3031macro_rules! int {
32 ($($ty:ty),+) => {
33 $(
34impl CastInto<i16> for $ty {
35fn cast(self) -> i16 {
36self as i16
37 }
38 }
3940impl Integer for $ty {
41const ZERO: Self = 0;
42const ONE: Self = 1;
43 }
44 )+
45 }
46}
4748int!(u16, u32, u64);
4950/// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
51///
52/// See the parent module's doc comment for why this is necessary.
53///
54/// Should **never ever** be implemented for other types or be used outside the `dec2flt` module.
55#[doc(hidden)]
56pub trait RawFloat:
57 Sized
58 + Div<Output = Self>
59 + Neg<Output = Self>
60 + Mul<Output = Self>
61 + Add<Output = Self>
62 + LowerExp
63 + PartialEq
64 + PartialOrd
65 + Default
66 + Clone
67 + Copy
68 + Debug
69{
70/// The unsigned integer with the same size as the float
71type Int: Integer + Into<u64>;
7273/* general constants */
7475const INFINITY: Self;
76const NEG_INFINITY: Self;
77const NAN: Self;
78const NEG_NAN: Self;
7980/// Bit width of the float
81const BITS: u32;
8283/// The number of bits in the significand, *including* the hidden bit.
84const SIG_TOTAL_BITS: u32;
8586const EXP_MASK: Self::Int;
87const SIG_MASK: Self::Int;
8889/// The number of bits in the significand, *excluding* the hidden bit.
90const SIG_BITS: u32 = Self::SIG_TOTAL_BITS - 1;
9192/// Number of bits in the exponent.
93const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
9495/// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
96 /// representation.
97 ///
98 /// This shifted fully right, use `EXP_MASK` for the shifted value.
99const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
100101/// Signed version of `EXP_SAT` since we convert a lot.
102const INFINITE_POWER: i32 = Self::EXP_SAT as i32;
103104/// The exponent bias value. This is also the maximum value of the exponent.
105const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
106107/// Minimum exponent value of normal values.
108const EXP_MIN: i32 = -(Self::EXP_BIAS as i32 - 1);
109110/// Round-to-even only happens for negative values of q
111 /// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
112 /// the 32-bit case.
113 ///
114 /// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
115 /// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
116 /// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
117 ///
118 /// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
119 /// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
120 /// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
121 /// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
122 /// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bit case).
123 ///
124 /// Thus we have that we only need to round ties to even when
125 /// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
126 /// (in the 32-bit case). In both cases,the power of five(5^|q|)
127 /// fits in a 64-bit word.
128const MIN_EXPONENT_ROUND_TO_EVEN: i32;
129const MAX_EXPONENT_ROUND_TO_EVEN: i32;
130131/* limits related to Fast pathing */
132133/// Largest decimal exponent for a non-infinite value.
134 ///
135 /// This is the max exponent in binary converted to the max exponent in decimal. Allows fast
136 /// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity.
137const LARGEST_POWER_OF_TEN: i32 = {
138let largest_pow2 = Self::EXP_BIAS + 1;
139 pow2_to_pow10(largest_pow2 as i64) as i32
140 };
141142/// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything
143 /// smaller than `10^SMALLEST_POWER_OF_TEN`, which will round to zero.
144 ///
145 /// The smallest power of ten is represented by `⌊log10(2^-n / (2^64 - 1))⌋`, where `n` is
146 /// the smallest power of two. The `2^64 - 1)` denominator comes from the number of values
147 /// that are representable by the intermediate storage format. I don't actually know _why_
148 /// the storage format is relevant here.
149 ///
150 /// The values may be calculated using the formula. Unfortunately we cannot calculate them at
151 /// compile time since intermediates exceed the range of an `f64`.
152const SMALLEST_POWER_OF_TEN: i32;
153154/// Maximum exponent for a fast path case, or `⌊(SIG_BITS+1)/log2(5)⌋`
155// assuming FLT_EVAL_METHOD = 0
156const MAX_EXPONENT_FAST_PATH: i64 = {
157let log2_5 = f64::consts::LOG2_10 - 1.0;
158 (Self::SIG_TOTAL_BITS as f64 / log2_5) as i64
159 };
160161/// Minimum exponent for a fast path case, or `-⌊(SIG_BITS+1)/log2(5)⌋`
162const MIN_EXPONENT_FAST_PATH: i64 = -Self::MAX_EXPONENT_FAST_PATH;
163164/// Maximum exponent that can be represented for a disguised-fast path case.
165 /// This is `MAX_EXPONENT_FAST_PATH + ⌊(SIG_BITS+1)/log2(10)⌋`
166const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 =
167Self::MAX_EXPONENT_FAST_PATH + (Self::SIG_TOTAL_BITS as f64 / f64::consts::LOG2_10) as i64;
168169/// Maximum mantissa for the fast-path (`1 << 53` for f64).
170const MAX_MANTISSA_FAST_PATH: u64 = 1 << Self::SIG_TOTAL_BITS;
171172/// Converts integer into float through an as cast.
173 /// This is only called in the fast-path algorithm, and therefore
174 /// will not lose precision, since the value will always have
175 /// only if the value is <= Self::MAX_MANTISSA_FAST_PATH.
176fn from_u64(v: u64) -> Self;
177178/// Performs a raw transmutation from an integer.
179fn from_u64_bits(v: u64) -> Self;
180181/// Gets a small power-of-ten for fast-path multiplication.
182fn pow10_fast_path(exponent: usize) -> Self;
183184/// Returns the category that this number falls into.
185fn classify(self) -> FpCategory;
186187/// Transmute to the integer representation
188fn to_bits(self) -> Self::Int;
189190/// Returns the mantissa, exponent and sign as integers.
191 ///
192 /// This returns `(m, p, s)` such that `s * m * 2^p` represents the original float. For 0, the
193 /// exponent will be `-(EXP_BIAS + SIG_BITS)`, which is the minimum subnormal power. For
194 /// infinity or NaN, the exponent will be `EXP_SAT - EXP_BIAS - SIG_BITS`.
195 ///
196 /// If subnormal, the mantissa will be shifted one bit to the left. Otherwise, it is returned
197 /// with the explicit bit set but otherwise unshifted
198 ///
199 /// `s` is only ever +/-1.
200fn integer_decode(self) -> (u64, i16, i8) {
201let bits = self.to_bits();
202let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 };
203let mut exponent: i16 = ((bits & Self::EXP_MASK) >> Self::SIG_BITS).cast();
204let mantissa = if exponent == 0 {
205 (bits & Self::SIG_MASK) << 1
206} else {
207 (bits & Self::SIG_MASK) | (Self::Int::ONE << Self::SIG_BITS)
208 };
209// Exponent bias + mantissa shift
210exponent -= (Self::EXP_BIAS + Self::SIG_BITS) as i16;
211 (mantissa.into(), exponent, sign)
212 }
213}
214215/// Solve for `b` in `10^b = 2^a`
216const fn pow2_to_pow10(a: i64) -> i64 {
217let res = (a as f64) / f64::consts::LOG2_10;
218 res as i64
219}
220221#[cfg(target_has_reliable_f16)]
222impl RawFloat for f16 {
223type Int = u16;
224225const INFINITY: Self = Self::INFINITY;
226const NEG_INFINITY: Self = Self::NEG_INFINITY;
227const NAN: Self = Self::NAN;
228const NEG_NAN: Self = -Self::NAN;
229230const BITS: u32 = 16;
231const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
232const EXP_MASK: Self::Int = Self::EXP_MASK;
233const SIG_MASK: Self::Int = Self::MAN_MASK;
234235const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -22;
236const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 5;
237const SMALLEST_POWER_OF_TEN: i32 = -27;
238239#[inline]
240fn from_u64(v: u64) -> Self {
241debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
242 v as _
243}
244245#[inline]
246fn from_u64_bits(v: u64) -> Self {
247Self::from_bits((v & 0xFFFF) as u16)
248 }
249250fn pow10_fast_path(exponent: usize) -> Self {
251#[allow(clippy::use_self)]
252const TABLE: [f16; 8] = [1e0, 1e1, 1e2, 1e3, 1e4, 0.0, 0.0, 0.];
253 TABLE[exponent & 7]
254 }
255256fn to_bits(self) -> Self::Int {
257self.to_bits()
258 }
259260fn classify(self) -> FpCategory {
261self.classify()
262 }
263}
264265impl RawFloat for f32 {
266type Int = u32;
267268const INFINITY: Self = f32::INFINITY;
269const NEG_INFINITY: Self = f32::NEG_INFINITY;
270const NAN: Self = f32::NAN;
271const NEG_NAN: Self = -f32::NAN;
272273const BITS: u32 = 32;
274const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
275const EXP_MASK: Self::Int = Self::EXP_MASK;
276const SIG_MASK: Self::Int = Self::MAN_MASK;
277278const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17;
279const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10;
280const SMALLEST_POWER_OF_TEN: i32 = -65;
281282#[inline]
283fn from_u64(v: u64) -> Self {
284debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
285 v as _
286}
287288#[inline]
289fn from_u64_bits(v: u64) -> Self {
290 f32::from_bits((v & 0xFFFFFFFF) as u32)
291 }
292293fn pow10_fast_path(exponent: usize) -> Self {
294#[allow(clippy::use_self)]
295const TABLE: [f32; 16] =
296 [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 0., 0., 0., 0., 0.];
297 TABLE[exponent & 15]
298 }
299300fn to_bits(self) -> Self::Int {
301self.to_bits()
302 }
303304fn classify(self) -> FpCategory {
305self.classify()
306 }
307}
308309impl RawFloat for f64 {
310type Int = u64;
311312const INFINITY: Self = Self::INFINITY;
313const NEG_INFINITY: Self = Self::NEG_INFINITY;
314const NAN: Self = Self::NAN;
315const NEG_NAN: Self = -Self::NAN;
316317const BITS: u32 = 64;
318const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
319const EXP_MASK: Self::Int = Self::EXP_MASK;
320const SIG_MASK: Self::Int = Self::MAN_MASK;
321322const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4;
323const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23;
324const SMALLEST_POWER_OF_TEN: i32 = -342;
325326#[inline]
327fn from_u64(v: u64) -> Self {
328debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
329 v as _
330}
331332#[inline]
333fn from_u64_bits(v: u64) -> Self {
334 f64::from_bits(v)
335 }
336337fn pow10_fast_path(exponent: usize) -> Self {
338const TABLE: [f64; 32] = [
3391e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
3401e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 0., 0., 0., 0., 0., 0., 0., 0., 0.,
341 ];
342 TABLE[exponent & 31]
343 }
344345fn to_bits(self) -> Self::Int {
346self.to_bits()
347 }
348349fn classify(self) -> FpCategory {
350self.classify()
351 }
352}