half/
bfloat.rs

Help
1#[cfg(all(feature = "serde", feature = "alloc"))]
2#[allow(unused_imports)]
3use alloc::string::ToString;
4#[cfg(feature = "bytemuck")]
5use bytemuck::{Pod, Zeroable};
6use core::{
7    cmp::Ordering,
8    iter::{Product, Sum},
9    num::FpCategory,
10    ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign},
11};
12#[cfg(not(target_arch = "spirv"))]
13use core::{
14    fmt::{
15        Binary, Debug, Display, Error, Formatter, LowerExp, LowerHex, Octal, UpperExp, UpperHex,
16    },
17    num::ParseFloatError,
18    str::FromStr,
19};
20#[cfg(feature = "serde")]
21use serde::{Deserialize, Serialize};
22#[cfg(feature = "zerocopy")]
23use zerocopy::{AsBytes, FromBytes};
24
25pub(crate) mod convert;
26
27/// A 16-bit floating point type implementing the [`bfloat16`] format.
28///
29/// The [`bfloat16`] floating point format is a truncated 16-bit version of the IEEE 754 standard
30/// `binary32`, a.k.a [`f32`]. [`bf16`] has approximately the same dynamic range as [`f32`] by
31/// having a lower precision than [`f16`][crate::f16]. While [`f16`][crate::f16] has a precision of
32/// 11 bits, [`bf16`] has a precision of only 8 bits.
33///
34/// [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format
35#[allow(non_camel_case_types)]
36#[derive(Clone, Copy, Default)]
37#[repr(transparent)]
38#[cfg_attr(feature = "serde", derive(Serialize))]
39#[cfg_attr(
40    feature = "rkyv",
41    derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
42)]
43#[cfg_attr(feature = "rkyv", archive(resolver = "Bf16Resolver"))]
44#[cfg_attr(feature = "bytemuck", derive(Zeroable, Pod))]
45#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))]
46#[cfg_attr(kani, derive(kani::Arbitrary))]
47pub struct bf16(u16);
48
49impl bf16 {
50    /// Constructs a [`bf16`] value from the raw bits.
51    #[inline]
52    #[must_use]
53    pub const fn from_bits(bits: u16) -> bf16 {
54        bf16(bits)
55    }
56
57    /// Constructs a [`bf16`] value from a 32-bit floating point value.
58    ///
59    /// This operation is lossy. If the 32-bit value is too large to fit, ±∞ will result. NaN values
60    /// are preserved. Subnormal values that are too tiny to be represented will result in ±0. All
61    /// other values are truncated and rounded to the nearest representable value.
62    #[inline]
63    #[must_use]
64    pub fn from_f32(value: f32) -> bf16 {
65        Self::from_f32_const(value)
66    }
67
68    /// Constructs a [`bf16`] value from a 32-bit floating point value.
69    ///
70    /// This function is identical to [`from_f32`][Self::from_f32] except it never uses hardware
71    /// intrinsics, which allows it to be `const`. [`from_f32`][Self::from_f32] should be preferred
72    /// in any non-`const` context.
73    ///
74    /// This operation is lossy. If the 32-bit value is too large to fit, ±∞ will result. NaN values
75    /// are preserved. Subnormal values that are too tiny to be represented will result in ±0. All
76    /// other values are truncated and rounded to the nearest representable value.
77    #[inline]
78    #[must_use]
79    pub const fn from_f32_const(value: f32) -> bf16 {
80        bf16(convert::f32_to_bf16(value))
81    }
82
83    /// Constructs a [`bf16`] value from a 64-bit floating point value.
84    ///
85    /// This operation is lossy. If the 64-bit value is to large to fit, ±∞ will result. NaN values
86    /// are preserved. 64-bit subnormal values are too tiny to be represented and result in ±0.
87    /// Exponents that underflow the minimum exponent will result in subnormals or ±0. All other
88    /// values are truncated and rounded to the nearest representable value.
89    #[inline]
90    #[must_use]
91    pub fn from_f64(value: f64) -> bf16 {
92        Self::from_f64_const(value)
93    }
94
95    /// Constructs a [`bf16`] value from a 64-bit floating point value.
96    ///
97    /// This function is identical to [`from_f64`][Self::from_f64] except it never uses hardware
98    /// intrinsics, which allows it to be `const`. [`from_f64`][Self::from_f64] should be preferred
99    /// in any non-`const` context.
100    ///
101    /// This operation is lossy. If the 64-bit value is to large to fit, ±∞ will result. NaN values
102    /// are preserved. 64-bit subnormal values are too tiny to be represented and result in ±0.
103    /// Exponents that underflow the minimum exponent will result in subnormals or ±0. All other
104    /// values are truncated and rounded to the nearest representable value.
105    #[inline]
106    #[must_use]
107    pub const fn from_f64_const(value: f64) -> bf16 {
108        bf16(convert::f64_to_bf16(value))
109    }
110
111    /// Converts a [`bf16`] into the underlying bit representation.
112    #[inline]
113    #[must_use]
114    pub const fn to_bits(self) -> u16 {
115        self.0
116    }
117
118    /// Returns the memory representation of the underlying bit representation as a byte array in
119    /// little-endian byte order.
120    ///
121    /// # Examples
122    ///
123    /// ```rust
124    /// # use half::prelude::*;
125    /// let bytes = bf16::from_f32(12.5).to_le_bytes();
126    /// assert_eq!(bytes, [0x48, 0x41]);
127    /// ```
128    #[inline]
129    #[must_use]
130    pub const fn to_le_bytes(self) -> [u8; 2] {
131        self.0.to_le_bytes()
132    }
133
134    /// Returns the memory representation of the underlying bit representation as a byte array in
135    /// big-endian (network) byte order.
136    ///
137    /// # Examples
138    ///
139    /// ```rust
140    /// # use half::prelude::*;
141    /// let bytes = bf16::from_f32(12.5).to_be_bytes();
142    /// assert_eq!(bytes, [0x41, 0x48]);
143    /// ```
144    #[inline]
145    #[must_use]
146    pub const fn to_be_bytes(self) -> [u8; 2] {
147        self.0.to_be_bytes()
148    }
149
150    /// Returns the memory representation of the underlying bit representation as a byte array in
151    /// native byte order.
152    ///
153    /// As the target platform's native endianness is used, portable code should use
154    /// [`to_be_bytes`][bf16::to_be_bytes] or [`to_le_bytes`][bf16::to_le_bytes], as appropriate,
155    /// instead.
156    ///
157    /// # Examples
158    ///
159    /// ```rust
160    /// # use half::prelude::*;
161    /// let bytes = bf16::from_f32(12.5).to_ne_bytes();
162    /// assert_eq!(bytes, if cfg!(target_endian = "big") {
163    ///     [0x41, 0x48]
164    /// } else {
165    ///     [0x48, 0x41]
166    /// });
167    /// ```
168    #[inline]
169    #[must_use]
170    pub const fn to_ne_bytes(self) -> [u8; 2] {
171        self.0.to_ne_bytes()
172    }
173
174    /// Creates a floating point value from its representation as a byte array in little endian.
175    ///
176    /// # Examples
177    ///
178    /// ```rust
179    /// # use half::prelude::*;
180    /// let value = bf16::from_le_bytes([0x48, 0x41]);
181    /// assert_eq!(value, bf16::from_f32(12.5));
182    /// ```
183    #[inline]
184    #[must_use]
185    pub const fn from_le_bytes(bytes: [u8; 2]) -> bf16 {
186        bf16::from_bits(u16::from_le_bytes(bytes))
187    }
188
189    /// Creates a floating point value from its representation as a byte array in big endian.
190    ///
191    /// # Examples
192    ///
193    /// ```rust
194    /// # use half::prelude::*;
195    /// let value = bf16::from_be_bytes([0x41, 0x48]);
196    /// assert_eq!(value, bf16::from_f32(12.5));
197    /// ```
198    #[inline]
199    #[must_use]
200    pub const fn from_be_bytes(bytes: [u8; 2]) -> bf16 {
201        bf16::from_bits(u16::from_be_bytes(bytes))
202    }
203
204    /// Creates a floating point value from its representation as a byte array in native endian.
205    ///
206    /// As the target platform's native endianness is used, portable code likely wants to use
207    /// [`from_be_bytes`][bf16::from_be_bytes] or [`from_le_bytes`][bf16::from_le_bytes], as
208    /// appropriate instead.
209    ///
210    /// # Examples
211    ///
212    /// ```rust
213    /// # use half::prelude::*;
214    /// let value = bf16::from_ne_bytes(if cfg!(target_endian = "big") {
215    ///     [0x41, 0x48]
216    /// } else {
217    ///     [0x48, 0x41]
218    /// });
219    /// assert_eq!(value, bf16::from_f32(12.5));
220    /// ```
221    #[inline]
222    #[must_use]
223    pub const fn from_ne_bytes(bytes: [u8; 2]) -> bf16 {
224        bf16::from_bits(u16::from_ne_bytes(bytes))
225    }
226
227    /// Converts a [`bf16`] value into an [`f32`] value.
228    ///
229    /// This conversion is lossless as all values can be represented exactly in [`f32`].
230    #[inline]
231    #[must_use]
232    pub fn to_f32(self) -> f32 {
233        self.to_f32_const()
234    }
235
236    /// Converts a [`bf16`] value into an [`f32`] value.
237    ///
238    /// This function is identical to [`to_f32`][Self::to_f32] except it never uses hardware
239    /// intrinsics, which allows it to be `const`. [`to_f32`][Self::to_f32] should be preferred
240    /// in any non-`const` context.
241    ///
242    /// This conversion is lossless as all values can be represented exactly in [`f32`].
243    #[inline]
244    #[must_use]
245    pub const fn to_f32_const(self) -> f32 {
246        convert::bf16_to_f32(self.0)
247    }
248
249    /// Converts a [`bf16`] value into an [`f64`] value.
250    ///
251    /// This conversion is lossless as all values can be represented exactly in [`f64`].
252    #[inline]
253    #[must_use]
254    pub fn to_f64(self) -> f64 {
255        self.to_f64_const()
256    }
257
258    /// Converts a [`bf16`] value into an [`f64`] value.
259    ///
260    /// This function is identical to [`to_f64`][Self::to_f64] except it never uses hardware
261    /// intrinsics, which allows it to be `const`. [`to_f64`][Self::to_f64] should be preferred
262    /// in any non-`const` context.
263    ///
264    /// This conversion is lossless as all values can be represented exactly in [`f64`].
265    #[inline]
266    #[must_use]
267    pub const fn to_f64_const(self) -> f64 {
268        convert::bf16_to_f64(self.0)
269    }
270
271    /// Returns `true` if this value is NaN and `false` otherwise.
272    ///
273    /// # Examples
274    ///
275    /// ```rust
276    /// # use half::prelude::*;
277    ///
278    /// let nan = bf16::NAN;
279    /// let f = bf16::from_f32(7.0_f32);
280    ///
281    /// assert!(nan.is_nan());
282    /// assert!(!f.is_nan());
283    /// ```
284    #[inline]
285    #[must_use]
286    pub const fn is_nan(self) -> bool {
287        self.0 & 0x7FFFu16 > 0x7F80u16
288    }
289
290    /// Returns `true` if this value is ±∞ and `false` otherwise.
291    ///
292    /// # Examples
293    ///
294    /// ```rust
295    /// # use half::prelude::*;
296    ///
297    /// let f = bf16::from_f32(7.0f32);
298    /// let inf = bf16::INFINITY;
299    /// let neg_inf = bf16::NEG_INFINITY;
300    /// let nan = bf16::NAN;
301    ///
302    /// assert!(!f.is_infinite());
303    /// assert!(!nan.is_infinite());
304    ///
305    /// assert!(inf.is_infinite());
306    /// assert!(neg_inf.is_infinite());
307    /// ```
308    #[inline]
309    #[must_use]
310    pub const fn is_infinite(self) -> bool {
311        self.0 & 0x7FFFu16 == 0x7F80u16
312    }
313
314    /// Returns `true` if this number is neither infinite nor NaN.
315    ///
316    /// # Examples
317    ///
318    /// ```rust
319    /// # use half::prelude::*;
320    ///
321    /// let f = bf16::from_f32(7.0f32);
322    /// let inf = bf16::INFINITY;
323    /// let neg_inf = bf16::NEG_INFINITY;
324    /// let nan = bf16::NAN;
325    ///
326    /// assert!(f.is_finite());
327    ///
328    /// assert!(!nan.is_finite());
329    /// assert!(!inf.is_finite());
330    /// assert!(!neg_inf.is_finite());
331    /// ```
332    #[inline]
333    #[must_use]
334    pub const fn is_finite(self) -> bool {
335        self.0 & 0x7F80u16 != 0x7F80u16
336    }
337
338    /// Returns `true` if the number is neither zero, infinite, subnormal, or NaN.
339    ///
340    /// # Examples
341    ///
342    /// ```rust
343    /// # use half::prelude::*;
344    ///
345    /// let min = bf16::MIN_POSITIVE;
346    /// let max = bf16::MAX;
347    /// let lower_than_min = bf16::from_f32(1.0e-39_f32);
348    /// let zero = bf16::from_f32(0.0_f32);
349    ///
350    /// assert!(min.is_normal());
351    /// assert!(max.is_normal());
352    ///
353    /// assert!(!zero.is_normal());
354    /// assert!(!bf16::NAN.is_normal());
355    /// assert!(!bf16::INFINITY.is_normal());
356    /// // Values between 0 and `min` are subnormal.
357    /// assert!(!lower_than_min.is_normal());
358    /// ```
359    #[inline]
360    #[must_use]
361    pub const fn is_normal(self) -> bool {
362        let exp = self.0 & 0x7F80u16;
363        exp != 0x7F80u16 && exp != 0
364    }
365
366    /// Returns the floating point category of the number.
367    ///
368    /// If only one property is going to be tested, it is generally faster to use the specific
369    /// predicate instead.
370    ///
371    /// # Examples
372    ///
373    /// ```rust
374    /// use std::num::FpCategory;
375    /// # use half::prelude::*;
376    ///
377    /// let num = bf16::from_f32(12.4_f32);
378    /// let inf = bf16::INFINITY;
379    ///
380    /// assert_eq!(num.classify(), FpCategory::Normal);
381    /// assert_eq!(inf.classify(), FpCategory::Infinite);
382    /// ```
383    #[must_use]
384    pub const fn classify(self) -> FpCategory {
385        let exp = self.0 & 0x7F80u16;
386        let man = self.0 & 0x007Fu16;
387        match (exp, man) {
388            (0, 0) => FpCategory::Zero,
389            (0, _) => FpCategory::Subnormal,
390            (0x7F80u16, 0) => FpCategory::Infinite,
391            (0x7F80u16, _) => FpCategory::Nan,
392            _ => FpCategory::Normal,
393        }
394    }
395
396    /// Returns a number that represents the sign of `self`.
397    ///
398    /// * 1.0 if the number is positive, +0.0 or [`INFINITY`][bf16::INFINITY]
399    /// * −1.0 if the number is negative, −0.0` or [`NEG_INFINITY`][bf16::NEG_INFINITY]
400    /// * [`NAN`][bf16::NAN] if the number is NaN
401    ///
402    /// # Examples
403    ///
404    /// ```rust
405    /// # use half::prelude::*;
406    ///
407    /// let f = bf16::from_f32(3.5_f32);
408    ///
409    /// assert_eq!(f.signum(), bf16::from_f32(1.0));
410    /// assert_eq!(bf16::NEG_INFINITY.signum(), bf16::from_f32(-1.0));
411    ///
412    /// assert!(bf16::NAN.signum().is_nan());
413    /// ```
414    #[must_use]
415    pub const fn signum(self) -> bf16 {
416        if self.is_nan() {
417            self
418        } else if self.0 & 0x8000u16 != 0 {
419            Self::NEG_ONE
420        } else {
421            Self::ONE
422        }
423    }
424
425    /// Returns `true` if and only if `self` has a positive sign, including +0.0, NaNs with a
426    /// positive sign bit and +∞.
427    ///
428    /// # Examples
429    ///
430    /// ```rust
431    /// # use half::prelude::*;
432    ///
433    /// let nan = bf16::NAN;
434    /// let f = bf16::from_f32(7.0_f32);
435    /// let g = bf16::from_f32(-7.0_f32);
436    ///
437    /// assert!(f.is_sign_positive());
438    /// assert!(!g.is_sign_positive());
439    /// // NaN can be either positive or negative
440    /// assert!(nan.is_sign_positive() != nan.is_sign_negative());
441    /// ```
442    #[inline]
443    #[must_use]
444    pub const fn is_sign_positive(self) -> bool {
445        self.0 & 0x8000u16 == 0
446    }
447
448    /// Returns `true` if and only if `self` has a negative sign, including −0.0, NaNs with a
449    /// negative sign bit and −∞.
450    ///
451    /// # Examples
452    ///
453    /// ```rust
454    /// # use half::prelude::*;
455    ///
456    /// let nan = bf16::NAN;
457    /// let f = bf16::from_f32(7.0f32);
458    /// let g = bf16::from_f32(-7.0f32);
459    ///
460    /// assert!(!f.is_sign_negative());
461    /// assert!(g.is_sign_negative());
462    /// // NaN can be either positive or negative
463    /// assert!(nan.is_sign_positive() != nan.is_sign_negative());
464    /// ```
465    #[inline]
466    #[must_use]
467    pub const fn is_sign_negative(self) -> bool {
468        self.0 & 0x8000u16 != 0
469    }
470
471    /// Returns a number composed of the magnitude of `self` and the sign of `sign`.
472    ///
473    /// Equal to `self` if the sign of `self` and `sign` are the same, otherwise equal to `-self`.
474    /// If `self` is NaN, then NaN with the sign of `sign` is returned.
475    ///
476    /// # Examples
477    ///
478    /// ```
479    /// # use half::prelude::*;
480    /// let f = bf16::from_f32(3.5);
481    ///
482    /// assert_eq!(f.copysign(bf16::from_f32(0.42)), bf16::from_f32(3.5));
483    /// assert_eq!(f.copysign(bf16::from_f32(-0.42)), bf16::from_f32(-3.5));
484    /// assert_eq!((-f).copysign(bf16::from_f32(0.42)), bf16::from_f32(3.5));
485    /// assert_eq!((-f).copysign(bf16::from_f32(-0.42)), bf16::from_f32(-3.5));
486    ///
487    /// assert!(bf16::NAN.copysign(bf16::from_f32(1.0)).is_nan());
488    /// ```
489    #[inline]
490    #[must_use]
491    pub const fn copysign(self, sign: bf16) -> bf16 {
492        bf16((sign.0 & 0x8000u16) | (self.0 & 0x7FFFu16))
493    }
494
495    /// Returns the maximum of the two numbers.
496    ///
497    /// If one of the arguments is NaN, then the other argument is returned.
498    ///
499    /// # Examples
500    ///
501    /// ```
502    /// # use half::prelude::*;
503    /// let x = bf16::from_f32(1.0);
504    /// let y = bf16::from_f32(2.0);
505    ///
506    /// assert_eq!(x.max(y), y);
507    /// ```
508    #[inline]
509    #[must_use]
510    pub fn max(self, other: bf16) -> bf16 {
511        if other > self && !other.is_nan() {
512            other
513        } else {
514            self
515        }
516    }
517
518    /// Returns the minimum of the two numbers.
519    ///
520    /// If one of the arguments is NaN, then the other argument is returned.
521    ///
522    /// # Examples
523    ///
524    /// ```
525    /// # use half::prelude::*;
526    /// let x = bf16::from_f32(1.0);
527    /// let y = bf16::from_f32(2.0);
528    ///
529    /// assert_eq!(x.min(y), x);
530    /// ```
531    #[inline]
532    #[must_use]
533    pub fn min(self, other: bf16) -> bf16 {
534        if other < self && !other.is_nan() {
535            other
536        } else {
537            self
538        }
539    }
540
541    /// Restrict a value to a certain interval unless it is NaN.
542    ///
543    /// Returns `max` if `self` is greater than `max`, and `min` if `self` is less than `min`.
544    /// Otherwise this returns `self`.
545    ///
546    /// Note that this function returns NaN if the initial value was NaN as well.
547    ///
548    /// # Panics
549    /// Panics if `min > max`, `min` is NaN, or `max` is NaN.
550    ///
551    /// # Examples
552    ///
553    /// ```
554    /// # use half::prelude::*;
555    /// assert!(bf16::from_f32(-3.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(-2.0));
556    /// assert!(bf16::from_f32(0.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(0.0));
557    /// assert!(bf16::from_f32(2.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(1.0));
558    /// assert!(bf16::NAN.clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)).is_nan());
559    /// ```
560    #[inline]
561    #[must_use]
562    pub fn clamp(self, min: bf16, max: bf16) -> bf16 {
563        assert!(min <= max);
564        let mut x = self;
565        if x < min {
566            x = min;
567        }
568        if x > max {
569            x = max;
570        }
571        x
572    }
573
574    /// Returns the ordering between `self` and `other`.
575    ///
576    /// Unlike the standard partial comparison between floating point numbers,
577    /// this comparison always produces an ordering in accordance to
578    /// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision)
579    /// floating point standard. The values are ordered in the following sequence:
580    ///
581    /// - negative quiet NaN
582    /// - negative signaling NaN
583    /// - negative infinity
584    /// - negative numbers
585    /// - negative subnormal numbers
586    /// - negative zero
587    /// - positive zero
588    /// - positive subnormal numbers
589    /// - positive numbers
590    /// - positive infinity
591    /// - positive signaling NaN
592    /// - positive quiet NaN.
593    ///
594    /// The ordering established by this function does not always agree with the
595    /// [`PartialOrd`] and [`PartialEq`] implementations of `bf16`. For example,
596    /// they consider negative and positive zero equal, while `total_cmp`
597    /// doesn't.
598    ///
599    /// The interpretation of the signaling NaN bit follows the definition in
600    /// the IEEE 754 standard, which may not match the interpretation by some of
601    /// the older, non-conformant (e.g. MIPS) hardware implementations.
602    ///
603    /// # Examples
604    /// ```
605    /// # use half::bf16;
606    /// let mut v: Vec<bf16> = vec![];
607    /// v.push(bf16::ONE);
608    /// v.push(bf16::INFINITY);
609    /// v.push(bf16::NEG_INFINITY);
610    /// v.push(bf16::NAN);
611    /// v.push(bf16::MAX_SUBNORMAL);
612    /// v.push(-bf16::MAX_SUBNORMAL);
613    /// v.push(bf16::ZERO);
614    /// v.push(bf16::NEG_ZERO);
615    /// v.push(bf16::NEG_ONE);
616    /// v.push(bf16::MIN_POSITIVE);
617    ///
618    /// v.sort_by(|a, b| a.total_cmp(&b));
619    ///
620    /// assert!(v
621    ///     .into_iter()
622    ///     .zip(
623    ///         [
624    ///             bf16::NEG_INFINITY,
625    ///             bf16::NEG_ONE,
626    ///             -bf16::MAX_SUBNORMAL,
627    ///             bf16::NEG_ZERO,
628    ///             bf16::ZERO,
629    ///             bf16::MAX_SUBNORMAL,
630    ///             bf16::MIN_POSITIVE,
631    ///             bf16::ONE,
632    ///             bf16::INFINITY,
633    ///             bf16::NAN
634    ///         ]
635    ///         .iter()
636    ///     )
637    ///     .all(|(a, b)| a.to_bits() == b.to_bits()));
638    /// ```
639    // Implementation based on: https://doc.rust-lang.org/std/primitive.f32.html#method.total_cmp
640    #[inline]
641    #[must_use]
642    pub fn total_cmp(&self, other: &Self) -> Ordering {
643        let mut left = self.to_bits() as i16;
644        let mut right = other.to_bits() as i16;
645        left ^= (((left >> 15) as u16) >> 1) as i16;
646        right ^= (((right >> 15) as u16) >> 1) as i16;
647        left.cmp(&right)
648    }
649
650    /// Alternate serialize adapter for serializing as a float.
651    ///
652    /// By default, [`bf16`] serializes as a newtype of [`u16`]. This is an alternate serialize
653    /// implementation that serializes as an [`f32`] value. It is designed for use with
654    /// `serialize_with` serde attributes. Deserialization from `f32` values is already supported by
655    /// the default deserialize implementation.
656    ///
657    /// # Examples
658    ///
659    /// A demonstration on how to use this adapater:
660    ///
661    /// ```
662    /// use serde::{Serialize, Deserialize};
663    /// use half::bf16;
664    ///
665    /// #[derive(Serialize, Deserialize)]
666    /// struct MyStruct {
667    ///     #[serde(serialize_with = "bf16::serialize_as_f32")]
668    ///     value: bf16 // Will be serialized as f32 instead of u16
669    /// }
670    /// ```
671    #[cfg(feature = "serde")]
672    pub fn serialize_as_f32<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
673        serializer.serialize_f32(self.to_f32())
674    }
675
676    /// Alternate serialize adapter for serializing as a string.
677    ///
678    /// By default, [`bf16`] serializes as a newtype of [`u16`]. This is an alternate serialize
679    /// implementation that serializes as a string value. It is designed for use with
680    /// `serialize_with` serde attributes. Deserialization from string values is already supported
681    /// by the default deserialize implementation.
682    ///
683    /// # Examples
684    ///
685    /// A demonstration on how to use this adapater:
686    ///
687    /// ```
688    /// use serde::{Serialize, Deserialize};
689    /// use half::bf16;
690    ///
691    /// #[derive(Serialize, Deserialize)]
692    /// struct MyStruct {
693    ///     #[serde(serialize_with = "bf16::serialize_as_string")]
694    ///     value: bf16 // Will be serialized as a string instead of u16
695    /// }
696    /// ```
697    #[cfg(all(feature = "serde", feature = "alloc"))]
698    pub fn serialize_as_string<S: serde::Serializer>(
699        &self,
700        serializer: S,
701    ) -> Result<S::Ok, S::Error> {
702        serializer.serialize_str(&self.to_string())
703    }
704
705    /// Approximate number of [`bf16`] significant digits in base 10
706    pub const DIGITS: u32 = 2;
707    /// [`bf16`]
708    /// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value
709    ///
710    /// This is the difference between 1.0 and the next largest representable number.
711    pub const EPSILON: bf16 = bf16(0x3C00u16);
712    /// [`bf16`] positive Infinity (+∞)
713    pub const INFINITY: bf16 = bf16(0x7F80u16);
714    /// Number of [`bf16`] significant digits in base 2
715    pub const MANTISSA_DIGITS: u32 = 8;
716    /// Largest finite [`bf16`] value
717    pub const MAX: bf16 = bf16(0x7F7F);
718    /// Maximum possible [`bf16`] power of 10 exponent
719    pub const MAX_10_EXP: i32 = 38;
720    /// Maximum possible [`bf16`] power of 2 exponent
721    pub const MAX_EXP: i32 = 128;
722    /// Smallest finite [`bf16`] value
723    pub const MIN: bf16 = bf16(0xFF7F);
724    /// Minimum possible normal [`bf16`] power of 10 exponent
725    pub const MIN_10_EXP: i32 = -37;
726    /// One greater than the minimum possible normal [`bf16`] power of 2 exponent
727    pub const MIN_EXP: i32 = -125;
728    /// Smallest positive normal [`bf16`] value
729    pub const MIN_POSITIVE: bf16 = bf16(0x0080u16);
730    /// [`bf16`] Not a Number (NaN)
731    pub const NAN: bf16 = bf16(0x7FC0u16);
732    /// [`bf16`] negative infinity (-∞).
733    pub const NEG_INFINITY: bf16 = bf16(0xFF80u16);
734    /// The radix or base of the internal representation of [`bf16`]
735    pub const RADIX: u32 = 2;
736
737    /// Minimum positive subnormal [`bf16`] value
738    pub const MIN_POSITIVE_SUBNORMAL: bf16 = bf16(0x0001u16);
739    /// Maximum subnormal [`bf16`] value
740    pub const MAX_SUBNORMAL: bf16 = bf16(0x007Fu16);
741
742    /// [`bf16`] 1
743    pub const ONE: bf16 = bf16(0x3F80u16);
744    /// [`bf16`] 0
745    pub const ZERO: bf16 = bf16(0x0000u16);
746    /// [`bf16`] -0
747    pub const NEG_ZERO: bf16 = bf16(0x8000u16);
748    /// [`bf16`] -1
749    pub const NEG_ONE: bf16 = bf16(0xBF80u16);
750
751    /// [`bf16`] Euler's number (ℯ)
752    pub const E: bf16 = bf16(0x402Eu16);
753    /// [`bf16`] Archimedes' constant (π)
754    pub const PI: bf16 = bf16(0x4049u16);
755    /// [`bf16`] 1/π
756    pub const FRAC_1_PI: bf16 = bf16(0x3EA3u16);
757    /// [`bf16`] 1/√2
758    pub const FRAC_1_SQRT_2: bf16 = bf16(0x3F35u16);
759    /// [`bf16`] 2/π
760    pub const FRAC_2_PI: bf16 = bf16(0x3F23u16);
761    /// [`bf16`] 2/√π
762    pub const FRAC_2_SQRT_PI: bf16 = bf16(0x3F90u16);
763    /// [`bf16`] π/2
764    pub const FRAC_PI_2: bf16 = bf16(0x3FC9u16);
765    /// [`bf16`] π/3
766    pub const FRAC_PI_3: bf16 = bf16(0x3F86u16);
767    /// [`bf16`] π/4
768    pub const FRAC_PI_4: bf16 = bf16(0x3F49u16);
769    /// [`bf16`] π/6
770    pub const FRAC_PI_6: bf16 = bf16(0x3F06u16);
771    /// [`bf16`] π/8
772    pub const FRAC_PI_8: bf16 = bf16(0x3EC9u16);
773    /// [`bf16`] 𝗅𝗇 10
774    pub const LN_10: bf16 = bf16(0x4013u16);
775    /// [`bf16`] 𝗅𝗇 2
776    pub const LN_2: bf16 = bf16(0x3F31u16);
777    /// [`bf16`] 𝗅𝗈𝗀₁₀ℯ
778    pub const LOG10_E: bf16 = bf16(0x3EDEu16);
779    /// [`bf16`] 𝗅𝗈𝗀₁₀2
780    pub const LOG10_2: bf16 = bf16(0x3E9Au16);
781    /// [`bf16`] 𝗅𝗈𝗀₂ℯ
782    pub const LOG2_E: bf16 = bf16(0x3FB9u16);
783    /// [`bf16`] 𝗅𝗈𝗀₂10
784    pub const LOG2_10: bf16 = bf16(0x4055u16);
785    /// [`bf16`] √2
786    pub const SQRT_2: bf16 = bf16(0x3FB5u16);
787}
788
789impl From<bf16> for f32 {
790    #[inline]
791    fn from(x: bf16) -> f32 {
792        x.to_f32()
793    }
794}
795
796impl From<bf16> for f64 {
797    #[inline]
798    fn from(x: bf16) -> f64 {
799        x.to_f64()
800    }
801}
802
803impl From<i8> for bf16 {
804    #[inline]
805    fn from(x: i8) -> bf16 {
806        // Convert to f32, then to bf16
807        bf16::from_f32(f32::from(x))
808    }
809}
810
811impl From<u8> for bf16 {
812    #[inline]
813    fn from(x: u8) -> bf16 {
814        // Convert to f32, then to f16
815        bf16::from_f32(f32::from(x))
816    }
817}
818
819impl PartialEq for bf16 {
820    fn eq(&self, other: &bf16) -> bool {
821        if self.is_nan() || other.is_nan() {
822            false
823        } else {
824            (self.0 == other.0) || ((self.0 | other.0) & 0x7FFFu16 == 0)
825        }
826    }
827}
828
829impl PartialOrd for bf16 {
830    fn partial_cmp(&self, other: &bf16) -> Option<Ordering> {
831        if self.is_nan() || other.is_nan() {
832            None
833        } else {
834            let neg = self.0 & 0x8000u16 != 0;
835            let other_neg = other.0 & 0x8000u16 != 0;
836            match (neg, other_neg) {
837                (false, false) => Some(self.0.cmp(&other.0)),
838                (false, true) => {
839                    if (self.0 | other.0) & 0x7FFFu16 == 0 {
840                        Some(Ordering::Equal)
841                    } else {
842                        Some(Ordering::Greater)
843                    }
844                }
845                (true, false) => {
846                    if (self.0 | other.0) & 0x7FFFu16 == 0 {
847                        Some(Ordering::Equal)
848                    } else {
849                        Some(Ordering::Less)
850                    }
851                }
852                (true, true) => Some(other.0.cmp(&self.0)),
853            }
854        }
855    }
856
857    fn lt(&self, other: &bf16) -> bool {
858        if self.is_nan() || other.is_nan() {
859            false
860        } else {
861            let neg = self.0 & 0x8000u16 != 0;
862            let other_neg = other.0 & 0x8000u16 != 0;
863            match (neg, other_neg) {
864                (false, false) => self.0 < other.0,
865                (false, true) => false,
866                (true, false) => (self.0 | other.0) & 0x7FFFu16 != 0,
867                (true, true) => self.0 > other.0,
868            }
869        }
870    }
871
872    fn le(&self, other: &bf16) -> bool {
873        if self.is_nan() || other.is_nan() {
874            false
875        } else {
876            let neg = self.0 & 0x8000u16 != 0;
877            let other_neg = other.0 & 0x8000u16 != 0;
878            match (neg, other_neg) {
879                (false, false) => self.0 <= other.0,
880                (false, true) => (self.0 | other.0) & 0x7FFFu16 == 0,
881                (true, false) => true,
882                (true, true) => self.0 >= other.0,
883            }
884        }
885    }
886
887    fn gt(&self, other: &bf16) -> bool {
888        if self.is_nan() || other.is_nan() {
889            false
890        } else {
891            let neg = self.0 & 0x8000u16 != 0;
892            let other_neg = other.0 & 0x8000u16 != 0;
893            match (neg, other_neg) {
894                (false, false) => self.0 > other.0,
895                (false, true) => (self.0 | other.0) & 0x7FFFu16 != 0,
896                (true, false) => false,
897                (true, true) => self.0 < other.0,
898            }
899        }
900    }
901
902    fn ge(&self, other: &bf16) -> bool {
903        if self.is_nan() || other.is_nan() {
904            false
905        } else {
906            let neg = self.0 & 0x8000u16 != 0;
907            let other_neg = other.0 & 0x8000u16 != 0;
908            match (neg, other_neg) {
909                (false, false) => self.0 >= other.0,
910                (false, true) => true,
911                (true, false) => (self.0 | other.0) & 0x7FFFu16 == 0,
912                (true, true) => self.0 <= other.0,
913            }
914        }
915    }
916}
917
918#[cfg(not(target_arch = "spirv"))]
919impl FromStr for bf16 {
920    type Err = ParseFloatError;
921    fn from_str(src: &str) -> Result<bf16, ParseFloatError> {
922        f32::from_str(src).map(bf16::from_f32)
923    }
924}
925
926#[cfg(not(target_arch = "spirv"))]
927impl Debug for bf16 {
928    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
929        Debug::fmt(&self.to_f32(), f)
930    }
931}
932
933#[cfg(not(target_arch = "spirv"))]
934impl Display for bf16 {
935    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
936        Display::fmt(&self.to_f32(), f)
937    }
938}
939
940#[cfg(not(target_arch = "spirv"))]
941impl LowerExp for bf16 {
942    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
943        write!(f, "{:e}", self.to_f32())
944    }
945}
946
947#[cfg(not(target_arch = "spirv"))]
948impl UpperExp for bf16 {
949    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
950        write!(f, "{:E}", self.to_f32())
951    }
952}
953
954#[cfg(not(target_arch = "spirv"))]
955impl Binary for bf16 {
956    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
957        write!(f, "{:b}", self.0)
958    }
959}
960
961#[cfg(not(target_arch = "spirv"))]
962impl Octal for bf16 {
963    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
964        write!(f, "{:o}", self.0)
965    }
966}
967
968#[cfg(not(target_arch = "spirv"))]
969impl LowerHex for bf16 {
970    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
971        write!(f, "{:x}", self.0)
972    }
973}
974
975#[cfg(not(target_arch = "spirv"))]
976impl UpperHex for bf16 {
977    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
978        write!(f, "{:X}", self.0)
979    }
980}
981
982impl Neg for bf16 {
983    type Output = Self;
984
985    fn neg(self) -> Self::Output {
986        Self(self.0 ^ 0x8000)
987    }
988}
989
990impl Neg for &bf16 {
991    type Output = <bf16 as Neg>::Output;
992
993    #[inline]
994    fn neg(self) -> Self::Output {
995        Neg::neg(*self)
996    }
997}
998
999impl Add for bf16 {
1000    type Output = Self;
1001
1002    fn add(self, rhs: Self) -> Self::Output {
1003        Self::from_f32(Self::to_f32(self) + Self::to_f32(rhs))
1004    }
1005}
1006
1007impl Add<&bf16> for bf16 {
1008    type Output = <bf16 as Add<bf16>>::Output;
1009
1010    #[inline]
1011    fn add(self, rhs: &bf16) -> Self::Output {
1012        self.add(*rhs)
1013    }
1014}
1015
1016impl Add<&bf16> for &bf16 {
1017    type Output = <bf16 as Add<bf16>>::Output;
1018
1019    #[inline]
1020    fn add(self, rhs: &bf16) -> Self::Output {
1021        (*self).add(*rhs)
1022    }
1023}
1024
1025impl Add<bf16> for &bf16 {
1026    type Output = <bf16 as Add<bf16>>::Output;
1027
1028    #[inline]
1029    fn add(self, rhs: bf16) -> Self::Output {
1030        (*self).add(rhs)
1031    }
1032}
1033
1034impl AddAssign for bf16 {
1035    #[inline]
1036    fn add_assign(&mut self, rhs: Self) {
1037        *self = (*self).add(rhs);
1038    }
1039}
1040
1041impl AddAssign<&bf16> for bf16 {
1042    #[inline]
1043    fn add_assign(&mut self, rhs: &bf16) {
1044        *self = (*self).add(rhs);
1045    }
1046}
1047
1048impl Sub for bf16 {
1049    type Output = Self;
1050
1051    fn sub(self, rhs: Self) -> Self::Output {
1052        Self::from_f32(Self::to_f32(self) - Self::to_f32(rhs))
1053    }
1054}
1055
1056impl Sub<&bf16> for bf16 {
1057    type Output = <bf16 as Sub<bf16>>::Output;
1058
1059    #[inline]
1060    fn sub(self, rhs: &bf16) -> Self::Output {
1061        self.sub(*rhs)
1062    }
1063}
1064
1065impl Sub<&bf16> for &bf16 {
1066    type Output = <bf16 as Sub<bf16>>::Output;
1067
1068    #[inline]
1069    fn sub(self, rhs: &bf16) -> Self::Output {
1070        (*self).sub(*rhs)
1071    }
1072}
1073
1074impl Sub<bf16> for &bf16 {
1075    type Output = <bf16 as Sub<bf16>>::Output;
1076
1077    #[inline]
1078    fn sub(self, rhs: bf16) -> Self::Output {
1079        (*self).sub(rhs)
1080    }
1081}
1082
1083impl SubAssign for bf16 {
1084    #[inline]
1085    fn sub_assign(&mut self, rhs: Self) {
1086        *self = (*self).sub(rhs);
1087    }
1088}
1089
1090impl SubAssign<&bf16> for bf16 {
1091    #[inline]
1092    fn sub_assign(&mut self, rhs: &bf16) {
1093        *self = (*self).sub(rhs);
1094    }
1095}
1096
1097impl Mul for bf16 {
1098    type Output = Self;
1099
1100    fn mul(self, rhs: Self) -> Self::Output {
1101        Self::from_f32(Self::to_f32(self) * Self::to_f32(rhs))
1102    }
1103}
1104
1105impl Mul<&bf16> for bf16 {
1106    type Output = <bf16 as Mul<bf16>>::Output;
1107
1108    #[inline]
1109    fn mul(self, rhs: &bf16) -> Self::Output {
1110        self.mul(*rhs)
1111    }
1112}
1113
1114impl Mul<&bf16> for &bf16 {
1115    type Output = <bf16 as Mul<bf16>>::Output;
1116
1117    #[inline]
1118    fn mul(self, rhs: &bf16) -> Self::Output {
1119        (*self).mul(*rhs)
1120    }
1121}
1122
1123impl Mul<bf16> for &bf16 {
1124    type Output = <bf16 as Mul<bf16>>::Output;
1125
1126    #[inline]
1127    fn mul(self, rhs: bf16) -> Self::Output {
1128        (*self).mul(rhs)
1129    }
1130}
1131
1132impl MulAssign for bf16 {
1133    #[inline]
1134    fn mul_assign(&mut self, rhs: Self) {
1135        *self = (*self).mul(rhs);
1136    }
1137}
1138
1139impl MulAssign<&bf16> for bf16 {
1140    #[inline]
1141    fn mul_assign(&mut self, rhs: &bf16) {
1142        *self = (*self).mul(rhs);
1143    }
1144}
1145
1146impl Div for bf16 {
1147    type Output = Self;
1148
1149    fn div(self, rhs: Self) -> Self::Output {
1150        Self::from_f32(Self::to_f32(self) / Self::to_f32(rhs))
1151    }
1152}
1153
1154impl Div<&bf16> for bf16 {
1155    type Output = <bf16 as Div<bf16>>::Output;
1156
1157    #[inline]
1158    fn div(self, rhs: &bf16) -> Self::Output {
1159        self.div(*rhs)
1160    }
1161}
1162
1163impl Div<&bf16> for &bf16 {
1164    type Output = <bf16 as Div<bf16>>::Output;
1165
1166    #[inline]
1167    fn div(self, rhs: &bf16) -> Self::Output {
1168        (*self).div(*rhs)
1169    }
1170}
1171
1172impl Div<bf16> for &bf16 {
1173    type Output = <bf16 as Div<bf16>>::Output;
1174
1175    #[inline]
1176    fn div(self, rhs: bf16) -> Self::Output {
1177        (*self).div(rhs)
1178    }
1179}
1180
1181impl DivAssign for bf16 {
1182    #[inline]
1183    fn div_assign(&mut self, rhs: Self) {
1184        *self = (*self).div(rhs);
1185    }
1186}
1187
1188impl DivAssign<&bf16> for bf16 {
1189    #[inline]
1190    fn div_assign(&mut self, rhs: &bf16) {
1191        *self = (*self).div(rhs);
1192    }
1193}
1194
1195impl Rem for bf16 {
1196    type Output = Self;
1197
1198    fn rem(self, rhs: Self) -> Self::Output {
1199        Self::from_f32(Self::to_f32(self) % Self::to_f32(rhs))
1200    }
1201}
1202
1203impl Rem<&bf16> for bf16 {
1204    type Output = <bf16 as Rem<bf16>>::Output;
1205
1206    #[inline]
1207    fn rem(self, rhs: &bf16) -> Self::Output {
1208        self.rem(*rhs)
1209    }
1210}
1211
1212impl Rem<&bf16> for &bf16 {
1213    type Output = <bf16 as Rem<bf16>>::Output;
1214
1215    #[inline]
1216    fn rem(self, rhs: &bf16) -> Self::Output {
1217        (*self).rem(*rhs)
1218    }
1219}
1220
1221impl Rem<bf16> for &bf16 {
1222    type Output = <bf16 as Rem<bf16>>::Output;
1223
1224    #[inline]
1225    fn rem(self, rhs: bf16) -> Self::Output {
1226        (*self).rem(rhs)
1227    }
1228}
1229
1230impl RemAssign for bf16 {
1231    #[inline]
1232    fn rem_assign(&mut self, rhs: Self) {
1233        *self = (*self).rem(rhs);
1234    }
1235}
1236
1237impl RemAssign<&bf16> for bf16 {
1238    #[inline]
1239    fn rem_assign(&mut self, rhs: &bf16) {
1240        *self = (*self).rem(rhs);
1241    }
1242}
1243
1244impl Product for bf16 {
1245    #[inline]
1246    fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
1247        bf16::from_f32(iter.map(|f| f.to_f32()).product())
1248    }
1249}
1250
1251impl<'a> Product<&'a bf16> for bf16 {
1252    #[inline]
1253    fn product<I: Iterator<Item = &'a bf16>>(iter: I) -> Self {
1254        bf16::from_f32(iter.map(|f| f.to_f32()).product())
1255    }
1256}
1257
1258impl Sum for bf16 {
1259    #[inline]
1260    fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
1261        bf16::from_f32(iter.map(|f| f.to_f32()).sum())
1262    }
1263}
1264
1265impl<'a> Sum<&'a bf16> for bf16 {
1266    #[inline]
1267    fn sum<I: Iterator<Item = &'a bf16>>(iter: I) -> Self {
1268        bf16::from_f32(iter.map(|f| f.to_f32()).sum())
1269    }
1270}
1271
1272#[cfg(feature = "serde")]
1273struct Visitor;
1274
1275#[cfg(feature = "serde")]
1276impl<'de> Deserialize<'de> for bf16 {
1277    fn deserialize<D>(deserializer: D) -> Result<bf16, D::Error>
1278    where
1279        D: serde::de::Deserializer<'de>,
1280    {
1281        deserializer.deserialize_newtype_struct("bf16", Visitor)
1282    }
1283}
1284
1285#[cfg(feature = "serde")]
1286impl<'de> serde::de::Visitor<'de> for Visitor {
1287    type Value = bf16;
1288
1289    fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
1290        write!(formatter, "tuple struct bf16")
1291    }
1292
1293    fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
1294    where
1295        D: serde::Deserializer<'de>,
1296    {
1297        Ok(bf16(<u16 as Deserialize>::deserialize(deserializer)?))
1298    }
1299
1300    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1301    where
1302        E: serde::de::Error,
1303    {
1304        v.parse().map_err(|_| {
1305            serde::de::Error::invalid_value(serde::de::Unexpected::Str(v), &"a float string")
1306        })
1307    }
1308
1309    fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E>
1310    where
1311        E: serde::de::Error,
1312    {
1313        Ok(bf16::from_f32(v))
1314    }
1315
1316    fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
1317    where
1318        E: serde::de::Error,
1319    {
1320        Ok(bf16::from_f64(v))
1321    }
1322}
1323
1324#[allow(
1325    clippy::cognitive_complexity,
1326    clippy::float_cmp,
1327    clippy::neg_cmp_op_on_partial_ord
1328)]
1329#[cfg(test)]
1330mod test {
1331    use super::*;
1332    #[allow(unused_imports)]
1333    use core::cmp::Ordering;
1334    #[cfg(feature = "num-traits")]
1335    use num_traits::{AsPrimitive, FromPrimitive, ToPrimitive};
1336    use quickcheck_macros::quickcheck;
1337
1338    #[cfg(feature = "num-traits")]
1339    #[test]
1340    fn as_primitive() {
1341        let two = bf16::from_f32(2.0);
1342        assert_eq!(<i32 as AsPrimitive<bf16>>::as_(2), two);
1343        assert_eq!(<bf16 as AsPrimitive<i32>>::as_(two), 2);
1344
1345        assert_eq!(<f32 as AsPrimitive<bf16>>::as_(2.0), two);
1346        assert_eq!(<bf16 as AsPrimitive<f32>>::as_(two), 2.0);
1347
1348        assert_eq!(<f64 as AsPrimitive<bf16>>::as_(2.0), two);
1349        assert_eq!(<bf16 as AsPrimitive<f64>>::as_(two), 2.0);
1350    }
1351
1352    #[cfg(feature = "num-traits")]
1353    #[test]
1354    fn to_primitive() {
1355        let two = bf16::from_f32(2.0);
1356        assert_eq!(ToPrimitive::to_i32(&two).unwrap(), 2i32);
1357        assert_eq!(ToPrimitive::to_f32(&two).unwrap(), 2.0f32);
1358        assert_eq!(ToPrimitive::to_f64(&two).unwrap(), 2.0f64);
1359    }
1360
1361    #[cfg(feature = "num-traits")]
1362    #[test]
1363    fn from_primitive() {
1364        let two = bf16::from_f32(2.0);
1365        assert_eq!(<bf16 as FromPrimitive>::from_i32(2).unwrap(), two);
1366        assert_eq!(<bf16 as FromPrimitive>::from_f32(2.0).unwrap(), two);
1367        assert_eq!(<bf16 as FromPrimitive>::from_f64(2.0).unwrap(), two);
1368    }
1369
1370    #[test]
1371    fn test_bf16_consts_from_f32() {
1372        let one = bf16::from_f32(1.0);
1373        let zero = bf16::from_f32(0.0);
1374        let neg_zero = bf16::from_f32(-0.0);
1375        let neg_one = bf16::from_f32(-1.0);
1376        let inf = bf16::from_f32(core::f32::INFINITY);
1377        let neg_inf = bf16::from_f32(core::f32::NEG_INFINITY);
1378        let nan = bf16::from_f32(core::f32::NAN);
1379
1380        assert_eq!(bf16::ONE, one);
1381        assert_eq!(bf16::ZERO, zero);
1382        assert!(zero.is_sign_positive());
1383        assert_eq!(bf16::NEG_ZERO, neg_zero);
1384        assert!(neg_zero.is_sign_negative());
1385        assert_eq!(bf16::NEG_ONE, neg_one);
1386        assert!(neg_one.is_sign_negative());
1387        assert_eq!(bf16::INFINITY, inf);
1388        assert_eq!(bf16::NEG_INFINITY, neg_inf);
1389        assert!(nan.is_nan());
1390        assert!(bf16::NAN.is_nan());
1391
1392        let e = bf16::from_f32(core::f32::consts::E);
1393        let pi = bf16::from_f32(core::f32::consts::PI);
1394        let frac_1_pi = bf16::from_f32(core::f32::consts::FRAC_1_PI);
1395        let frac_1_sqrt_2 = bf16::from_f32(core::f32::consts::FRAC_1_SQRT_2);
1396        let frac_2_pi = bf16::from_f32(core::f32::consts::FRAC_2_PI);
1397        let frac_2_sqrt_pi = bf16::from_f32(core::f32::consts::FRAC_2_SQRT_PI);
1398        let frac_pi_2 = bf16::from_f32(core::f32::consts::FRAC_PI_2);
1399        let frac_pi_3 = bf16::from_f32(core::f32::consts::FRAC_PI_3);
1400        let frac_pi_4 = bf16::from_f32(core::f32::consts::FRAC_PI_4);
1401        let frac_pi_6 = bf16::from_f32(core::f32::consts::FRAC_PI_6);
1402        let frac_pi_8 = bf16::from_f32(core::f32::consts::FRAC_PI_8);
1403        let ln_10 = bf16::from_f32(core::f32::consts::LN_10);
1404        let ln_2 = bf16::from_f32(core::f32::consts::LN_2);
1405        let log10_e = bf16::from_f32(core::f32::consts::LOG10_E);
1406        // core::f32::consts::LOG10_2 requires rustc 1.43.0
1407        let log10_2 = bf16::from_f32(2f32.log10());
1408        let log2_e = bf16::from_f32(core::f32::consts::LOG2_E);
1409        // core::f32::consts::LOG2_10 requires rustc 1.43.0
1410        let log2_10 = bf16::from_f32(10f32.log2());
1411        let sqrt_2 = bf16::from_f32(core::f32::consts::SQRT_2);
1412
1413        assert_eq!(bf16::E, e);
1414        assert_eq!(bf16::PI, pi);
1415        assert_eq!(bf16::FRAC_1_PI, frac_1_pi);
1416        assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2);
1417        assert_eq!(bf16::FRAC_2_PI, frac_2_pi);
1418        assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
1419        assert_eq!(bf16::FRAC_PI_2, frac_pi_2);
1420        assert_eq!(bf16::FRAC_PI_3, frac_pi_3);
1421        assert_eq!(bf16::FRAC_PI_4, frac_pi_4);
1422        assert_eq!(bf16::FRAC_PI_6, frac_pi_6);
1423        assert_eq!(bf16::FRAC_PI_8, frac_pi_8);
1424        assert_eq!(bf16::LN_10, ln_10);
1425        assert_eq!(bf16::LN_2, ln_2);
1426        assert_eq!(bf16::LOG10_E, log10_e);
1427        assert_eq!(bf16::LOG10_2, log10_2);
1428        assert_eq!(bf16::LOG2_E, log2_e);
1429        assert_eq!(bf16::LOG2_10, log2_10);
1430        assert_eq!(bf16::SQRT_2, sqrt_2);
1431    }
1432
1433    #[test]
1434    fn test_bf16_consts_from_f64() {
1435        let one = bf16::from_f64(1.0);
1436        let zero = bf16::from_f64(0.0);
1437        let neg_zero = bf16::from_f64(-0.0);
1438        let inf = bf16::from_f64(core::f64::INFINITY);
1439        let neg_inf = bf16::from_f64(core::f64::NEG_INFINITY);
1440        let nan = bf16::from_f64(core::f64::NAN);
1441
1442        assert_eq!(bf16::ONE, one);
1443        assert_eq!(bf16::ZERO, zero);
1444        assert_eq!(bf16::NEG_ZERO, neg_zero);
1445        assert_eq!(bf16::INFINITY, inf);
1446        assert_eq!(bf16::NEG_INFINITY, neg_inf);
1447        assert!(nan.is_nan());
1448        assert!(bf16::NAN.is_nan());
1449
1450        let e = bf16::from_f64(core::f64::consts::E);
1451        let pi = bf16::from_f64(core::f64::consts::PI);
1452        let frac_1_pi = bf16::from_f64(core::f64::consts::FRAC_1_PI);
1453        let frac_1_sqrt_2 = bf16::from_f64(core::f64::consts::FRAC_1_SQRT_2);
1454        let frac_2_pi = bf16::from_f64(core::f64::consts::FRAC_2_PI);
1455        let frac_2_sqrt_pi = bf16::from_f64(core::f64::consts::FRAC_2_SQRT_PI);
1456        let frac_pi_2 = bf16::from_f64(core::f64::consts::FRAC_PI_2);
1457        let frac_pi_3 = bf16::from_f64(core::f64::consts::FRAC_PI_3);
1458        let frac_pi_4 = bf16::from_f64(core::f64::consts::FRAC_PI_4);
1459        let frac_pi_6 = bf16::from_f64(core::f64::consts::FRAC_PI_6);
1460        let frac_pi_8 = bf16::from_f64(core::f64::consts::FRAC_PI_8);
1461        let ln_10 = bf16::from_f64(core::f64::consts::LN_10);
1462        let ln_2 = bf16::from_f64(core::f64::consts::LN_2);
1463        let log10_e = bf16::from_f64(core::f64::consts::LOG10_E);
1464        // core::f64::consts::LOG10_2 requires rustc 1.43.0
1465        let log10_2 = bf16::from_f64(2f64.log10());
1466        let log2_e = bf16::from_f64(core::f64::consts::LOG2_E);
1467        // core::f64::consts::LOG2_10 requires rustc 1.43.0
1468        let log2_10 = bf16::from_f64(10f64.log2());
1469        let sqrt_2 = bf16::from_f64(core::f64::consts::SQRT_2);
1470
1471        assert_eq!(bf16::E, e);
1472        assert_eq!(bf16::PI, pi);
1473        assert_eq!(bf16::FRAC_1_PI, frac_1_pi);
1474        assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2);
1475        assert_eq!(bf16::FRAC_2_PI, frac_2_pi);
1476        assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
1477        assert_eq!(bf16::FRAC_PI_2, frac_pi_2);
1478        assert_eq!(bf16::FRAC_PI_3, frac_pi_3);
1479        assert_eq!(bf16::FRAC_PI_4, frac_pi_4);
1480        assert_eq!(bf16::FRAC_PI_6, frac_pi_6);
1481        assert_eq!(bf16::FRAC_PI_8, frac_pi_8);
1482        assert_eq!(bf16::LN_10, ln_10);
1483        assert_eq!(bf16::LN_2, ln_2);
1484        assert_eq!(bf16::LOG10_E, log10_e);
1485        assert_eq!(bf16::LOG10_2, log10_2);
1486        assert_eq!(bf16::LOG2_E, log2_e);
1487        assert_eq!(bf16::LOG2_10, log2_10);
1488        assert_eq!(bf16::SQRT_2, sqrt_2);
1489    }
1490
1491    #[test]
1492    fn test_nan_conversion_to_smaller() {
1493        let nan64 = f64::from_bits(0x7FF0_0000_0000_0001u64);
1494        let neg_nan64 = f64::from_bits(0xFFF0_0000_0000_0001u64);
1495        let nan32 = f32::from_bits(0x7F80_0001u32);
1496        let neg_nan32 = f32::from_bits(0xFF80_0001u32);
1497        let nan32_from_64 = nan64 as f32;
1498        let neg_nan32_from_64 = neg_nan64 as f32;
1499        let nan16_from_64 = bf16::from_f64(nan64);
1500        let neg_nan16_from_64 = bf16::from_f64(neg_nan64);
1501        let nan16_from_32 = bf16::from_f32(nan32);
1502        let neg_nan16_from_32 = bf16::from_f32(neg_nan32);
1503
1504        assert!(nan64.is_nan() && nan64.is_sign_positive());
1505        assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative());
1506        assert!(nan32.is_nan() && nan32.is_sign_positive());
1507        assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
1508
1509        // f32/f64 NaN conversion sign is non-deterministic: https://github.com/starkat99/half-rs/issues/103
1510        assert!(neg_nan32_from_64.is_nan());
1511        assert!(nan32_from_64.is_nan());
1512        assert!(nan16_from_64.is_nan());
1513        assert!(neg_nan16_from_64.is_nan());
1514        assert!(nan16_from_32.is_nan());
1515        assert!(neg_nan16_from_32.is_nan());
1516    }
1517
1518    #[test]
1519    fn test_nan_conversion_to_larger() {
1520        let nan16 = bf16::from_bits(0x7F81u16);
1521        let neg_nan16 = bf16::from_bits(0xFF81u16);
1522        let nan32 = f32::from_bits(0x7F80_0001u32);
1523        let neg_nan32 = f32::from_bits(0xFF80_0001u32);
1524        let nan32_from_16 = f32::from(nan16);
1525        let neg_nan32_from_16 = f32::from(neg_nan16);
1526        let nan64_from_16 = f64::from(nan16);
1527        let neg_nan64_from_16 = f64::from(neg_nan16);
1528        let nan64_from_32 = f64::from(nan32);
1529        let neg_nan64_from_32 = f64::from(neg_nan32);
1530
1531        assert!(nan16.is_nan() && nan16.is_sign_positive());
1532        assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative());
1533        assert!(nan32.is_nan() && nan32.is_sign_positive());
1534        assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
1535
1536        // // f32/f64 NaN conversion sign is non-deterministic: https://github.com/starkat99/half-rs/issues/103
1537        assert!(nan32_from_16.is_nan());
1538        assert!(neg_nan32_from_16.is_nan());
1539        assert!(nan64_from_16.is_nan());
1540        assert!(neg_nan64_from_16.is_nan());
1541        assert!(nan64_from_32.is_nan());
1542        assert!(neg_nan64_from_32.is_nan());
1543    }
1544
1545    #[test]
1546    fn test_bf16_to_f32() {
1547        let f = bf16::from_f32(7.0);
1548        assert_eq!(f.to_f32(), 7.0f32);
1549
1550        // 7.1 is NOT exactly representable in 16-bit, it's rounded
1551        let f = bf16::from_f32(7.1);
1552        let diff = (f.to_f32() - 7.1f32).abs();
1553        // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1
1554        assert!(diff <= 4.0 * bf16::EPSILON.to_f32());
1555
1556        let tiny32 = f32::from_bits(0x0001_0000u32);
1557        assert_eq!(bf16::from_bits(0x0001).to_f32(), tiny32);
1558        assert_eq!(bf16::from_bits(0x0005).to_f32(), 5.0 * tiny32);
1559
1560        assert_eq!(bf16::from_bits(0x0001), bf16::from_f32(tiny32));
1561        assert_eq!(bf16::from_bits(0x0005), bf16::from_f32(5.0 * tiny32));
1562    }
1563
1564    #[test]
1565    fn test_bf16_to_f64() {
1566        let f = bf16::from_f64(7.0);
1567        assert_eq!(f.to_f64(), 7.0f64);
1568
1569        // 7.1 is NOT exactly representable in 16-bit, it's rounded
1570        let f = bf16::from_f64(7.1);
1571        let diff = (f.to_f64() - 7.1f64).abs();
1572        // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1
1573        assert!(diff <= 4.0 * bf16::EPSILON.to_f64());
1574
1575        let tiny64 = 2.0f64.powi(-133);
1576        assert_eq!(bf16::from_bits(0x0001).to_f64(), tiny64);
1577        assert_eq!(bf16::from_bits(0x0005).to_f64(), 5.0 * tiny64);
1578
1579        assert_eq!(bf16::from_bits(0x0001), bf16::from_f64(tiny64));
1580        assert_eq!(bf16::from_bits(0x0005), bf16::from_f64(5.0 * tiny64));
1581    }
1582
1583    #[test]
1584    fn test_comparisons() {
1585        let zero = bf16::from_f64(0.0);
1586        let one = bf16::from_f64(1.0);
1587        let neg_zero = bf16::from_f64(-0.0);
1588        let neg_one = bf16::from_f64(-1.0);
1589
1590        assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal));
1591        assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal));
1592        assert!(zero == neg_zero);
1593        assert!(neg_zero == zero);
1594        assert!(!(zero != neg_zero));
1595        assert!(!(neg_zero != zero));
1596        assert!(!(zero < neg_zero));
1597        assert!(!(neg_zero < zero));
1598        assert!(zero <= neg_zero);
1599        assert!(neg_zero <= zero);
1600        assert!(!(zero > neg_zero));
1601        assert!(!(neg_zero > zero));
1602        assert!(zero >= neg_zero);
1603        assert!(neg_zero >= zero);
1604
1605        assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater));
1606        assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less));
1607        assert!(!(one == neg_zero));
1608        assert!(!(neg_zero == one));
1609        assert!(one != neg_zero);
1610        assert!(neg_zero != one);
1611        assert!(!(one < neg_zero));
1612        assert!(neg_zero < one);
1613        assert!(!(one <= neg_zero));
1614        assert!(neg_zero <= one);
1615        assert!(one > neg_zero);
1616        assert!(!(neg_zero > one));
1617        assert!(one >= neg_zero);
1618        assert!(!(neg_zero >= one));
1619
1620        assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater));
1621        assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less));
1622        assert!(!(one == neg_one));
1623        assert!(!(neg_one == one));
1624        assert!(one != neg_one);
1625        assert!(neg_one != one);
1626        assert!(!(one < neg_one));
1627        assert!(neg_one < one);
1628        assert!(!(one <= neg_one));
1629        assert!(neg_one <= one);
1630        assert!(one > neg_one);
1631        assert!(!(neg_one > one));
1632        assert!(one >= neg_one);
1633        assert!(!(neg_one >= one));
1634    }
1635
1636    #[test]
1637    #[allow(clippy::erasing_op, clippy::identity_op)]
1638    fn round_to_even_f32() {
1639        // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133
1640        let min_sub = bf16::from_bits(1);
1641        let min_sub_f = (-133f32).exp2();
1642        assert_eq!(bf16::from_f32(min_sub_f).to_bits(), min_sub.to_bits());
1643        assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits());
1644
1645        // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding)
1646        // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even)
1647        // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up)
1648        assert_eq!(
1649            bf16::from_f32(min_sub_f * 0.49).to_bits(),
1650            min_sub.to_bits() * 0
1651        );
1652        assert_eq!(
1653            bf16::from_f32(min_sub_f * 0.50).to_bits(),
1654            min_sub.to_bits() * 0
1655        );
1656        assert_eq!(
1657            bf16::from_f32(min_sub_f * 0.51).to_bits(),
1658            min_sub.to_bits() * 1
1659        );
1660
1661        // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding)
1662        // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even)
1663        // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up)
1664        assert_eq!(
1665            bf16::from_f32(min_sub_f * 1.49).to_bits(),
1666            min_sub.to_bits() * 1
1667        );
1668        assert_eq!(
1669            bf16::from_f32(min_sub_f * 1.50).to_bits(),
1670            min_sub.to_bits() * 2
1671        );
1672        assert_eq!(
1673            bf16::from_f32(min_sub_f * 1.51).to_bits(),
1674            min_sub.to_bits() * 2
1675        );
1676
1677        // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding)
1678        // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even)
1679        // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up)
1680        assert_eq!(
1681            bf16::from_f32(min_sub_f * 2.49).to_bits(),
1682            min_sub.to_bits() * 2
1683        );
1684        assert_eq!(
1685            bf16::from_f32(min_sub_f * 2.50).to_bits(),
1686            min_sub.to_bits() * 2
1687        );
1688        assert_eq!(
1689            bf16::from_f32(min_sub_f * 2.51).to_bits(),
1690            min_sub.to_bits() * 3
1691        );
1692
1693        assert_eq!(
1694            bf16::from_f32(250.49f32).to_bits(),
1695            bf16::from_f32(250.0).to_bits()
1696        );
1697        assert_eq!(
1698            bf16::from_f32(250.50f32).to_bits(),
1699            bf16::from_f32(250.0).to_bits()
1700        );
1701        assert_eq!(
1702            bf16::from_f32(250.51f32).to_bits(),
1703            bf16::from_f32(251.0).to_bits()
1704        );
1705        assert_eq!(
1706            bf16::from_f32(251.49f32).to_bits(),
1707            bf16::from_f32(251.0).to_bits()
1708        );
1709        assert_eq!(
1710            bf16::from_f32(251.50f32).to_bits(),
1711            bf16::from_f32(252.0).to_bits()
1712        );
1713        assert_eq!(
1714            bf16::from_f32(251.51f32).to_bits(),
1715            bf16::from_f32(252.0).to_bits()
1716        );
1717        assert_eq!(
1718            bf16::from_f32(252.49f32).to_bits(),
1719            bf16::from_f32(252.0).to_bits()
1720        );
1721        assert_eq!(
1722            bf16::from_f32(252.50f32).to_bits(),
1723            bf16::from_f32(252.0).to_bits()
1724        );
1725        assert_eq!(
1726            bf16::from_f32(252.51f32).to_bits(),
1727            bf16::from_f32(253.0).to_bits()
1728        );
1729    }
1730
1731    #[test]
1732    #[allow(clippy::erasing_op, clippy::identity_op)]
1733    fn round_to_even_f64() {
1734        // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133
1735        let min_sub = bf16::from_bits(1);
1736        let min_sub_f = (-133f64).exp2();
1737        assert_eq!(bf16::from_f64(min_sub_f).to_bits(), min_sub.to_bits());
1738        assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits());
1739
1740        // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding)
1741        // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even)
1742        // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up)
1743        assert_eq!(
1744            bf16::from_f64(min_sub_f * 0.49).to_bits(),
1745            min_sub.to_bits() * 0
1746        );
1747        assert_eq!(
1748            bf16::from_f64(min_sub_f * 0.50).to_bits(),
1749            min_sub.to_bits() * 0
1750        );
1751        assert_eq!(
1752            bf16::from_f64(min_sub_f * 0.51).to_bits(),
1753            min_sub.to_bits() * 1
1754        );
1755
1756        // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding)
1757        // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even)
1758        // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up)
1759        assert_eq!(
1760            bf16::from_f64(min_sub_f * 1.49).to_bits(),
1761            min_sub.to_bits() * 1
1762        );
1763        assert_eq!(
1764            bf16::from_f64(min_sub_f * 1.50).to_bits(),
1765            min_sub.to_bits() * 2
1766        );
1767        assert_eq!(
1768            bf16::from_f64(min_sub_f * 1.51).to_bits(),
1769            min_sub.to_bits() * 2
1770        );
1771
1772        // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding)
1773        // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even)
1774        // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up)
1775        assert_eq!(
1776            bf16::from_f64(min_sub_f * 2.49).to_bits(),
1777            min_sub.to_bits() * 2
1778        );
1779        assert_eq!(
1780            bf16::from_f64(min_sub_f * 2.50).to_bits(),
1781            min_sub.to_bits() * 2
1782        );
1783        assert_eq!(
1784            bf16::from_f64(min_sub_f * 2.51).to_bits(),
1785            min_sub.to_bits() * 3
1786        );
1787
1788        assert_eq!(
1789            bf16::from_f64(250.49f64).to_bits(),
1790            bf16::from_f64(250.0).to_bits()
1791        );
1792        assert_eq!(
1793            bf16::from_f64(250.50f64).to_bits(),
1794            bf16::from_f64(250.0).to_bits()
1795        );
1796        assert_eq!(
1797            bf16::from_f64(250.51f64).to_bits(),
1798            bf16::from_f64(251.0).to_bits()
1799        );
1800        assert_eq!(
1801            bf16::from_f64(251.49f64).to_bits(),
1802            bf16::from_f64(251.0).to_bits()
1803        );
1804        assert_eq!(
1805            bf16::from_f64(251.50f64).to_bits(),
1806            bf16::from_f64(252.0).to_bits()
1807        );
1808        assert_eq!(
1809            bf16::from_f64(251.51f64).to_bits(),
1810            bf16::from_f64(252.0).to_bits()
1811        );
1812        assert_eq!(
1813            bf16::from_f64(252.49f64).to_bits(),
1814            bf16::from_f64(252.0).to_bits()
1815        );
1816        assert_eq!(
1817            bf16::from_f64(252.50f64).to_bits(),
1818            bf16::from_f64(252.0).to_bits()
1819        );
1820        assert_eq!(
1821            bf16::from_f64(252.51f64).to_bits(),
1822            bf16::from_f64(253.0).to_bits()
1823        );
1824    }
1825
1826    #[cfg(feature = "std")]
1827    #[test]
1828    fn formatting() {
1829        let f = bf16::from_f32(0.1152344);
1830
1831        assert_eq!(format!("{:.3}", f), "0.115");
1832        assert_eq!(format!("{:.4}", f), "0.1152");
1833        assert_eq!(format!("{:+.4}", f), "+0.1152");
1834        assert_eq!(format!("{:>+10.4}", f), "   +0.1152");
1835
1836        assert_eq!(format!("{:.3?}", f), "0.115");
1837        assert_eq!(format!("{:.4?}", f), "0.1152");
1838        assert_eq!(format!("{:+.4?}", f), "+0.1152");
1839        assert_eq!(format!("{:>+10.4?}", f), "   +0.1152");
1840    }
1841
1842    impl quickcheck::Arbitrary for bf16 {
1843        fn arbitrary(g: &mut quickcheck::Gen) -> Self {
1844            bf16(u16::arbitrary(g))
1845        }
1846    }
1847
1848    #[quickcheck]
1849    fn qc_roundtrip_bf16_f32_is_identity(f: bf16) -> bool {
1850        let roundtrip = bf16::from_f32(f.to_f32());
1851        if f.is_nan() {
1852            roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1853        } else {
1854            f.0 == roundtrip.0
1855        }
1856    }
1857
1858    #[quickcheck]
1859    fn qc_roundtrip_bf16_f64_is_identity(f: bf16) -> bool {
1860        let roundtrip = bf16::from_f64(f.to_f64());
1861        if f.is_nan() {
1862            roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1863        } else {
1864            f.0 == roundtrip.0
1865        }
1866    }
1867}
half/bfloat.rs

half/
bfloat.rs