From 76032aceecf3f1279a0744119fede2070cdda69f Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Sat, 20 Apr 2024 04:01:03 +0200 Subject: [PATCH] perf: use borrowing/carrying ops in add/sub, remove bound checks in shifts --- CHANGELOG.md | 7 ++++++- src/add.rs | 35 ++++++++++++++++++++++------------- src/algorithms/div/knuth.rs | 1 - src/bits.rs | 19 +++++++------------ src/macros.rs | 24 ++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e96b133..868144d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,11 +9,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- Use borrowing/carrying ops in add/sub, remove bound checks in shifts ([#366]) + ### Fixed -- add `alloc` requirement to `num-traits` feature [#363] +- Add `alloc` requirement to `num-traits` feature [#363] [#363]: https://github.com/recmo/uint/pull/363 +[#366]: https://github.com/recmo/uint/pull/366 ## [1.12.1] - 2024-03-12 diff --git a/src/add.rs b/src/add.rs index ebd56ffd..c1fa5f32 100644 --- a/src/add.rs +++ b/src/add.rs @@ -56,19 +56,24 @@ impl Uint { #[inline] #[must_use] pub const fn overflowing_add(mut self, rhs: Self) -> (Self, bool) { + // TODO: Replace with `u64::carrying_add` once stable. + #[inline] + const fn u64_carrying_add(lhs: u64, rhs: u64, carry: bool) -> (u64, bool) { + let (a, b) = lhs.overflowing_add(rhs); + let (c, d) = a.overflowing_add(carry as u64); + (c, b || d) + } + if BITS == 0 { return (Self::ZERO, false); } - let mut carry = 0_u128; + let mut carry = false; let mut i = 0; - #[allow(clippy::cast_possible_truncation)] // Intentional while i < LIMBS { - carry += self.limbs[i] as u128 + rhs.limbs[i] as u128; - self.limbs[i] = carry as u64; - carry >>= 64; + (self.limbs[i], carry) = u64_carrying_add(self.limbs[i], rhs.limbs[i], carry); i += 1; } - let overflow = carry != 0 || self.limbs[LIMBS - 1] > Self::MASK; + let overflow = carry || self.limbs[LIMBS - 1] > Self::MASK; self.limbs[LIMBS - 1] &= Self::MASK; (self, overflow) } @@ -93,20 +98,24 @@ impl Uint { #[inline] #[must_use] pub const fn overflowing_sub(mut self, rhs: Self) -> (Self, bool) { + // TODO: Replace with `u64::borrowing_sub` once stable. + #[inline] + const fn u64_borrowing_sub(lhs: u64, rhs: u64, borrow: bool) -> (u64, bool) { + let (a, b) = lhs.overflowing_sub(rhs); + let (c, d) = a.overflowing_sub(borrow as u64); + (c, b || d) + } + if BITS == 0 { return (Self::ZERO, false); } - let mut carry = 0_i128; + let mut borrow = false; let mut i = 0; - #[allow(clippy::cast_possible_truncation)] // Intentional - #[allow(clippy::cast_sign_loss)] // Intentional while i < LIMBS { - carry += self.limbs[i] as i128 - rhs.limbs[i] as i128; - self.limbs[i] = carry as u64; - carry >>= 64; + (self.limbs[i], borrow) = u64_borrowing_sub(self.limbs[i], rhs.limbs[i], borrow); i += 1; } - let overflow = carry != 0 || self.limbs[LIMBS - 1] > Self::MASK; + let overflow = borrow || self.limbs[LIMBS - 1] > Self::MASK; self.limbs[LIMBS - 1] &= Self::MASK; (self, overflow) } diff --git a/src/algorithms/div/knuth.rs b/src/algorithms/div/knuth.rs index 7ef4b9b2..d9d0d9b8 100644 --- a/src/algorithms/div/knuth.rs +++ b/src/algorithms/div/knuth.rs @@ -5,7 +5,6 @@ use crate::{ algorithms::{add::adc_n, mul::submul_nx1}, utils::{likely, unlikely}, }; -use core::u64; /// ⚠️ In-place Knuth normalized long division with reciprocals. /// diff --git a/src/bits.rs b/src/bits.rs index dd2d3c96..fdf12035 100644 --- a/src/bits.rs +++ b/src/bits.rs @@ -271,11 +271,10 @@ impl Uint { // Shift for i in (limbs..LIMBS).rev() { + assume!(i >= limbs && i - limbs < LIMBS); self.limbs[i] = self.limbs[i - limbs]; } - for i in 0..limbs { - self.limbs[i] = 0; - } + self.limbs[..limbs].fill(0); self.limbs[LIMBS - 1] &= Self::MASK; return (self, overflow); } @@ -294,13 +293,12 @@ impl Uint { // Shift for i in (limbs + 1..LIMBS).rev() { + assume!(i >= limbs + 1 && i - limbs < LIMBS && i - limbs - 1 < LIMBS); self.limbs[i] = self.limbs[i - limbs] << bits; self.limbs[i] |= self.limbs[i - limbs - 1] >> (64 - bits); } self.limbs[limbs] = self.limbs[0] << bits; - for i in 0..limbs { - self.limbs[i] = 0; - } + self.limbs[..limbs].fill(0); self.limbs[LIMBS - 1] &= Self::MASK; (self, overflow) } @@ -367,9 +365,7 @@ impl Uint { for i in 0..(LIMBS - limbs) { self.limbs[i] = self.limbs[i + limbs]; } - for i in (LIMBS - limbs)..LIMBS { - self.limbs[i] = 0; - } + self.limbs[LIMBS - limbs..].fill(0); return (self, overflow); } @@ -378,13 +374,12 @@ impl Uint { // Shift for i in 0..(LIMBS - limbs - 1) { + assume!(i + limbs < LIMBS && i + limbs + 1 < LIMBS); self.limbs[i] = self.limbs[i + limbs] >> bits; self.limbs[i] |= self.limbs[i + limbs + 1] << (64 - bits); } self.limbs[LIMBS - limbs - 1] = self.limbs[LIMBS - 1] >> bits; - for i in (LIMBS - limbs)..LIMBS { - self.limbs[i] = 0; - } + self.limbs[LIMBS - limbs..].fill(0); (self, overflow) } diff --git a/src/macros.rs b/src/macros.rs index c2f9ed2a..8c38dafa 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -75,6 +75,30 @@ macro_rules! impl_bin_op { }; } +macro_rules! assume { + ($e:expr $(,)?) => { + if !$e { + debug_unreachable!(stringify!($e)); + } + }; + + ($e:expr, $($t:tt)+) => { + if !$e { + debug_unreachable!($($t)+); + } + }; +} + +macro_rules! debug_unreachable { + ($($t:tt)*) => { + if cfg!(debug_assertions) { + unreachable!($($t)*); + } else { + unsafe { core::hint::unreachable_unchecked() }; + } + }; +} + #[cfg(test)] mod tests { // https://github.com/recmo/uint/issues/359