From d0f31a14a26adb196f4e8561f1b10f8f463f9a6e Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Mon, 27 May 2024 18:32:40 +0200 Subject: [PATCH] perf: non-allocating `mul_mod` --- src/base_convert.rs | 6 ++---- src/modular.rs | 23 +++++++++++++---------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/base_convert.rs b/src/base_convert.rs index d3c386f2..4a981413 100644 --- a/src/base_convert.rs +++ b/src/base_convert.rs @@ -95,8 +95,7 @@ impl Uint { /// /// * [`BaseConvertError::InvalidBase`] if the base is less than 2. /// * [`BaseConvertError::InvalidDigit`] if a digit is out of range. - /// * [`BaseConvertError::Overflow`] if the number is too large to - /// fit. + /// * [`BaseConvertError::Overflow`] if the number is too large to fit. #[inline] pub fn from_base_le(base: u64, digits: I) -> Result where @@ -155,8 +154,7 @@ impl Uint { /// /// * [`BaseConvertError::InvalidBase`] if the base is less than 2. /// * [`BaseConvertError::InvalidDigit`] if a digit is out of range. - /// * [`BaseConvertError::Overflow`] if the number is too large to - /// fit. + /// * [`BaseConvertError::Overflow`] if the number is too large to fit. #[inline] pub fn from_base_be>( base: u64, diff --git a/src/modular.rs b/src/modular.rs index e3dd2dbd..3970b520 100644 --- a/src/modular.rs +++ b/src/modular.rs @@ -52,24 +52,28 @@ impl Uint { /// some pre-computation. #[inline] #[must_use] - #[cfg(feature = "alloc")] // see comments below pub fn mul_mod(self, rhs: Self, mut modulus: Self) -> Self { if modulus == Self::ZERO { return Self::ZERO; } + + // Allocate at least `nlimbs(2 * BITS)` limbs to store the product. This array + // casting is a workaround for `generic_const_exprs` not being stable. + let mut product = [[0u64; 2]; LIMBS]; + let product_len = crate::nlimbs(2 * BITS); + debug_assert!(2 * LIMBS >= product_len); + // SAFETY: `[[u64; 2]; LIMBS] == [u64; 2 * LIMBS] >= [u64; nlimbs(2 * BITS)]`. + let product = unsafe { + core::slice::from_raw_parts_mut(product.as_mut_ptr().cast::(), product_len) + }; + // Compute full product. - // The challenge here is that Rust doesn't allow us to create a - // `Uint<2 * BITS, _>` for the intermediate result. Otherwise - // we could just use a `widening_mul`. So instead we allocate from heap. - // Alternatively we could use `alloca`, but that is blocked on - // See - let mut product = vec![0; crate::nlimbs(2 * BITS)]; - let overflow = algorithms::addmul(&mut product, self.as_limbs(), rhs.as_limbs()); + let overflow = algorithms::addmul(product, self.as_limbs(), rhs.as_limbs()); debug_assert!(!overflow); // Compute modulus using `div_rem`. // This stores the remainder in the divisor, `modulus`. - algorithms::div(&mut product, &mut modulus.limbs); + algorithms::div(product, &mut modulus.limbs); modulus } @@ -79,7 +83,6 @@ impl Uint { /// Returns zero if the modulus is zero. #[inline] #[must_use] - #[cfg(feature = "alloc")] // see comments in mul_mod pub fn pow_mod(mut self, mut exp: Self, modulus: Self) -> Self { if modulus == Self::ZERO || modulus <= Self::from(1) { // Also covers Self::BITS == 0