Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Float trait: shorten prefixes, rename EXP_MAX to EXP_SAT #738

Merged
merged 3 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ fn run() {
fn something_with_a_dtor(f: &dyn Fn()) {
struct A<'a>(&'a (dyn Fn() + 'a));

impl<'a> Drop for A<'a> {
impl Drop for A<'_> {
fn drop(&mut self) {
(self.0)();
}
Expand Down
16 changes: 8 additions & 8 deletions src/float/add.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ where
let zero = F::Int::ZERO;

let bits = F::BITS.cast();
let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
let significand_bits = F::SIG_BITS;
let max_exponent = F::EXP_SAT;

let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIGNIFICAND_MASK;
let significand_mask = F::SIG_MASK;
let sign_bit = F::SIGN_MASK as F::Int;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let exponent_mask = F::EXP_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
Expand Down Expand Up @@ -143,9 +143,9 @@ where

// If the addition carried up, we need to right-shift the result and
// adjust the exponent:
if a_significand & implicit_bit << 4 != MinInt::ZERO {
if a_significand & (implicit_bit << 4) != MinInt::ZERO {
let sticky = F::Int::from_bool(a_significand & one != MinInt::ZERO);
a_significand = a_significand >> 1 | sticky;
a_significand = (a_significand >> 1) | sticky;
a_exponent += 1;
}
}
Expand All @@ -161,7 +161,7 @@ where
let shift = (1 - a_exponent).cast();
let sticky =
F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != MinInt::ZERO);
a_significand = a_significand >> shift.cast() | sticky;
a_significand = (a_significand >> shift.cast()) | sticky;
a_exponent = 0;
}

Expand All @@ -170,7 +170,7 @@ where
let round_guard_sticky: i32 = a_significand_i32 & 0x7;

// Shift the significand into place, and mask off the implicit bit.
let mut result = a_significand >> 3 & significand_mask;
let mut result = (a_significand >> 3) & significand_mask;

// Insert the exponent and sign.
result |= a_exponent.cast() << significand_bits;
Expand Down
4 changes: 2 additions & 2 deletions src/float/cmp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ fn cmp<F: Float>(a: F, b: F) -> Result {

let sign_bit = F::SIGN_MASK as F::Int;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let exponent_mask = F::EXP_MASK;
let inf_rep = exponent_mask;

let a_rep = a.to_bits();
Expand Down Expand Up @@ -87,7 +87,7 @@ fn unord<F: Float>(a: F, b: F) -> bool {

let sign_bit = F::SIGN_MASK as F::Int;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let exponent_mask = F::EXP_MASK;
let inf_rep = exponent_mask;

let a_rep = a.to_bits();
Expand Down
50 changes: 25 additions & 25 deletions src/float/conv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ mod int_to_float {
/// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit
/// bit set can be added back later.
fn exp<I: Int, F: Float<Int: CastFrom<u32>>>(n: u32) -> F::Int {
F::Int::cast_from(F::EXPONENT_BIAS - 1 + I::BITS - n)
F::Int::cast_from(F::EXP_BIAS - 1 + I::BITS - n)
}

/// Adjust a mantissa with dropped bits to perform correct rounding.
Expand All @@ -42,7 +42,7 @@ mod int_to_float {
fn m_adj<F: Float>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
// Branchlessly extract a `1` if rounding up should happen, 0 otherwise
// This accounts for rounding to even.
let adj = (dropped_bits - (dropped_bits >> (F::BITS - 1) & !m_base)) >> (F::BITS - 1);
let adj = (dropped_bits - ((dropped_bits >> (F::BITS - 1)) & !m_base)) >> (F::BITS - 1);

// Add one when we need to round up. Break ties to even.
m_base + adj
Expand All @@ -54,17 +54,17 @@ mod int_to_float {
/// value to cancel it out.
fn repr<F: Float>(e: F::Int, m: F::Int) -> F::Int {
// + rather than | so the mantissa can overflow into the exponent
(e << F::SIGNIFICAND_BITS) + m
(e << F::SIG_BITS) + m
}

/// Shift distance from a left-aligned integer to a smaller float.
fn shift_f_lt_i<I: Int, F: Float>() -> u32 {
(I::BITS - F::BITS) + F::EXPONENT_BITS
(I::BITS - F::BITS) + F::EXP_BITS
}

/// Shift distance from an integer with `n` leading zeros to a smaller float.
fn shift_f_gt_i<I: Int, F: Float>(n: u32) -> u32 {
F::SIGNIFICAND_BITS - I::BITS + 1 + n
F::SIG_BITS - I::BITS + 1 + n
}

/// Perform a signed operation as unsigned, then add the sign back.
Expand All @@ -85,9 +85,9 @@ mod int_to_float {
}
let n = i.leading_zeros();
// Mantissa with implicit bit set (significant bits)
let m_base = (i << n) >> f32::EXPONENT_BITS;
let m_base = (i << n) >> f32::EXP_BITS;
// Bits that will be dropped (insignificant bits)
let adj = (i << n) << (f32::SIGNIFICAND_BITS + 1);
let adj = (i << n) << (f32::SIG_BITS + 1);
let m = m_adj::<f32>(m_base, adj);
let e = exp::<u32, f32>(n) - 1;
repr::<f32>(e, m)
Expand Down Expand Up @@ -116,7 +116,7 @@ mod int_to_float {
let m = (i as u64) << (shift_f_gt_i::<u32, f128>(n) - 64);
let e = exp::<u32, f128>(n) as u64 - 1;
// High 64 bits of f128 representation.
let h = (e << (f128::SIGNIFICAND_BITS - 64)) + m;
let h = (e << (f128::SIG_BITS - 64)) + m;

// Shift back to the high bits, the rest of the mantissa will always be 0.
(h as u128) << 64
Expand All @@ -128,8 +128,8 @@ mod int_to_float {
// Mantissa with implicit bit set
let m_base: u32 = (i_m >> shift_f_lt_i::<u64, f32>()) as u32;
// The entire lower half of `i` will be truncated (masked portion), plus the
// next `EXPONENT_BITS` bits.
let adj = (i_m >> f32::EXPONENT_BITS | i_m & 0xFFFF) as u32;
// next `EXP_BITS` bits.
let adj = ((i_m >> f32::EXP_BITS) | i_m & 0xFFFF) as u32;
let m = m_adj::<f32>(m_base, adj);
let e = if i == 0 { 0 } else { exp::<u64, f32>(n) - 1 };
repr::<f32>(e, m)
Expand All @@ -141,8 +141,8 @@ mod int_to_float {
}
let n = i.leading_zeros();
// Mantissa with implicit bit set
let m_base = (i << n) >> f64::EXPONENT_BITS;
let adj = (i << n) << (f64::SIGNIFICAND_BITS + 1);
let m_base = (i << n) >> f64::EXP_BITS;
let adj = (i << n) << (f64::SIG_BITS + 1);
let m = m_adj::<f64>(m_base, adj);
let e = exp::<u64, f64>(n) - 1;
repr::<f64>(e, m)
Expand All @@ -167,7 +167,7 @@ mod int_to_float {

// Within the upper `F::BITS`, everything except for the signifcand
// gets truncated
let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIGNIFICAND_BITS - 1)).cast();
let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIG_BITS - 1)).cast();

// The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
// check if it is nonzero.
Expand All @@ -186,8 +186,8 @@ mod int_to_float {
// Mantissa with implicit bit set
let m_base: u64 = (i_m >> shift_f_lt_i::<u128, f64>()) as u64;
// The entire lower half of `i` will be truncated (masked portion), plus the
// next `EXPONENT_BITS` bits.
let adj = (i_m >> f64::EXPONENT_BITS | i_m & 0xFFFF_FFFF) as u64;
// next `EXP_BITS` bits.
let adj = ((i_m >> f64::EXP_BITS) | i_m & 0xFFFF_FFFF) as u64;
let m = m_adj::<f64>(m_base, adj);
let e = if i == 0 { 0 } else { exp::<u128, f64>(n) - 1 };
repr::<f64>(e, m)
Expand All @@ -200,8 +200,8 @@ mod int_to_float {
}
let n = i.leading_zeros();
// Mantissa with implicit bit set
let m_base = (i << n) >> f128::EXPONENT_BITS;
let adj = (i << n) << (f128::SIGNIFICAND_BITS + 1);
let m_base = (i << n) >> f128::EXP_BITS;
let adj = (i << n) << (f128::SIG_BITS + 1);
let m = m_adj::<f128>(m_base, adj);
let e = exp::<u128, f128>(n) - 1;
repr::<f128>(e, m)
Expand Down Expand Up @@ -362,29 +362,29 @@ where
F::Int: CastFrom<u32>,
u32: CastFrom<F::Int>,
{
let int_max_exp = F::EXPONENT_BIAS + I::MAX.ilog2() + 1;
let foobar = F::EXPONENT_BIAS + I::UnsignedInt::BITS - 1;
let int_max_exp = F::EXP_BIAS + I::MAX.ilog2() + 1;
let foobar = F::EXP_BIAS + I::UnsignedInt::BITS - 1;

if fbits < F::ONE.to_bits() {
// < 0 gets rounded to 0
I::ZERO
} else if fbits < F::Int::cast_from(int_max_exp) << F::SIGNIFICAND_BITS {
} else if fbits < F::Int::cast_from(int_max_exp) << F::SIG_BITS {
// >= 1, < integer max
let m_base = if I::UnsignedInt::BITS >= F::Int::BITS {
I::UnsignedInt::cast_from(fbits) << (I::BITS - F::SIGNIFICAND_BITS - 1)
I::UnsignedInt::cast_from(fbits) << (I::BITS - F::SIG_BITS - 1)
} else {
I::UnsignedInt::cast_from(fbits >> (F::SIGNIFICAND_BITS - I::BITS + 1))
I::UnsignedInt::cast_from(fbits >> (F::SIG_BITS - I::BITS + 1))
};

// Set the implicit 1-bit.
let m: I::UnsignedInt = I::UnsignedInt::ONE << (I::BITS - 1) | m_base;
let m: I::UnsignedInt = (I::UnsignedInt::ONE << (I::BITS - 1)) | m_base;

// Shift based on the exponent and bias.
let s: u32 = (foobar) - u32::cast_from(fbits >> F::SIGNIFICAND_BITS);
let s: u32 = (foobar) - u32::cast_from(fbits >> F::SIG_BITS);

let unsigned = m >> s;
map_inbounds(I::from_unsigned(unsigned))
} else if fbits <= F::EXPONENT_MASK {
} else if fbits <= F::EXP_MASK {
// >= max (incl. inf)
out_of_bounds()
} else {
Expand Down
12 changes: 6 additions & 6 deletions src/float/div.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,16 +105,16 @@ where
let hw = F::BITS / 2;
let lo_mask = F::Int::MAX >> hw;

let significand_bits = F::SIGNIFICAND_BITS;
let significand_bits = F::SIG_BITS;
// Saturated exponent, representing infinity
let exponent_sat: F::Int = F::EXPONENT_MAX.cast();
let exponent_sat: F::Int = F::EXP_SAT.cast();

let exponent_bias = F::EXPONENT_BIAS;
let exponent_bias = F::EXP_BIAS;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIGNIFICAND_MASK;
let significand_mask = F::SIG_MASK;
let sign_bit = F::SIGN_MASK;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let exponent_mask = F::EXP_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
Expand Down Expand Up @@ -261,7 +261,7 @@ where
let c_hw = c_hw::<F>();

// Check that the top bit is set, i.e. value is within `[1, 2)`.
debug_assert!(b_uq1_hw & one_hw << (HalfRep::<F>::BITS - 1) > zero_hw);
debug_assert!(b_uq1_hw & (one_hw << (HalfRep::<F>::BITS - 1)) > zero_hw);

// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
// so x0 fits to UQ0.HW without wrapping.
Expand Down
14 changes: 7 additions & 7 deletions src/float/extend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,19 @@ where
let src_zero = F::Int::ZERO;
let src_one = F::Int::ONE;
let src_bits = F::BITS;
let src_sign_bits = F::SIGNIFICAND_BITS;
let src_exp_bias = F::EXPONENT_BIAS;
let src_sign_bits = F::SIG_BITS;
let src_exp_bias = F::EXP_BIAS;
let src_min_normal = F::IMPLICIT_BIT;
let src_infinity = F::EXPONENT_MASK;
let src_infinity = F::EXP_MASK;
let src_sign_mask = F::SIGN_MASK as F::Int;
let src_abs_mask = src_sign_mask - src_one;
let src_qnan = F::SIGNIFICAND_MASK;
let src_qnan = F::SIG_MASK;
let src_nan_code = src_qnan - src_one;

let dst_bits = R::BITS;
let dst_sign_bits = R::SIGNIFICAND_BITS;
let dst_inf_exp = R::EXPONENT_MAX;
let dst_exp_bias = R::EXPONENT_BIAS;
let dst_sign_bits = R::SIG_BITS;
let dst_inf_exp = R::EXP_SAT;
let dst_exp_bias = R::EXP_BIAS;
let dst_min_normal = R::IMPLICIT_BIT;

let sign_bits_delta = dst_sign_bits - src_sign_bits;
Expand Down
Loading
Loading