Alexhuszagh · Alexhuszagh · Jan 11, 2025 · Jan 11, 2025 · Jan 11, 2025
diff --git a/CHANGELOG b/CHANGELOG
@@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 
 - Bug where the `radix` feature wasn't enabling `power-of-two` in `lexical-core` or `lexical`.
+- Fixed performance issues due to a lack of inlining on the Eisel-Lemire algorithm (#210).
 
 ## [1.0.5] 2024-12-08
 

diff --git a/lexical-parse-float/src/binary.rs b/lexical-parse-float/src/binary.rs
@@ -149,7 +149,6 @@ pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>(
 ///
 /// This avoids the need for arbitrary-precision arithmetic, since the result
 /// will always be a near-halfway representation where rounded-down it's even.
-#[cfg_attr(not(feature = "compact"), inline(always))]
 pub fn slow_binary<F: RawFloat, const FORMAT: u128>(num: Number) -> ExtendedFloat80 {
     let format = NumberFormat::<{ FORMAT }> {};
     let radix = format.radix();

diff --git a/lexical-parse-float/src/lemire.rs b/lexical-parse-float/src/lemire.rs
@@ -51,6 +51,7 @@ pub fn lemire<F: LemireFloat>(num: &Number, lossy: bool) -> ExtendedFloat80 {
 /// at a Gigabyte per Second" in section 5, "Fast Algorithm", and
 /// section 6, "Exact Numbers And Ties", available online:
 /// <https://arxiv.org/abs/2101.11408.pdf>.
+#[inline]
 #[must_use]
 #[allow(clippy::missing_inline_in_public_items)] // reason="public for testing only"
 pub fn compute_float<F: LemireFloat>(q: i64, mut w: u64, lossy: bool) -> ExtendedFloat80 {
@@ -201,6 +202,7 @@ const fn full_multiplication(a: u64, b: u64) -> (u64, u64) {
 // 64-bit words approximating the result, with the "high" part corresponding to
 // the most significant bits and the low part corresponding to the least
 // significant bits.
+#[inline]
 fn compute_product_approx(q: i64, w: u64, precision: usize) -> (u64, u64) {
     debug_assert!(q >= SMALLEST_POWER_OF_FIVE as i64, "must be within our required pow5 range");
     debug_assert!(q <= LARGEST_POWER_OF_FIVE as i64, "must be within our required pow5 range");

diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs
@@ -238,6 +238,7 @@ macro_rules! to_native {
 }
 
 /// Parse a float from bytes using a complete parser.
+#[inline(always)]
 #[allow(clippy::missing_inline_in_public_items)] // reason = "only public for testing"
 pub fn parse_complete<F: LemireFloat, const FORMAT: u128>(
     bytes: &[u8],
@@ -280,6 +281,7 @@ pub fn parse_complete<F: LemireFloat, const FORMAT: u128>(
 }
 
 /// Parse a float using only the fast path as a complete parser.
+#[inline(always)]
 #[allow(clippy::missing_inline_in_public_items)] // reason = "only public for testing"
 pub fn fast_path_complete<F: LemireFloat, const FORMAT: u128>(
     bytes: &[u8],
@@ -304,6 +306,7 @@ pub fn fast_path_complete<F: LemireFloat, const FORMAT: u128>(
 }
 
 /// Parse a float from bytes using a partial parser.
+#[inline(always)]
 #[allow(clippy::missing_inline_in_public_items)] // reason = "only public for testing"
 pub fn parse_partial<F: LemireFloat, const FORMAT: u128>(
     bytes: &[u8],
@@ -352,6 +355,7 @@ pub fn parse_partial<F: LemireFloat, const FORMAT: u128>(
 }
 
 /// Parse a float using only the fast path as a partial parser.
+#[inline(always)]
 #[allow(clippy::missing_inline_in_public_items)] // reason = "only public for testing"
 pub fn fast_path_partial<F: LemireFloat, const FORMAT: u128>(
     bytes: &[u8],
@@ -825,6 +829,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
     ))
 }
 
+#[inline(always)]
 pub fn parse_partial_number<'a, const FORMAT: u128>(
     byte: Bytes<'a, FORMAT>,
     is_negative: bool,

diff --git a/lexical-parse-float/src/slow.rs b/lexical-parse-float/src/slow.rs
@@ -45,7 +45,6 @@ use crate::shared;
 /// to `16777216.0`. These near-halfway conversions therefore may require
 /// a large number of digits to unambiguously determine how to round.
 #[must_use]
-#[inline(always)]
 #[allow(clippy::unwrap_used)] // reason = "none is a developer error"
 pub fn slow_radix<F: RawFloat, const FORMAT: u128>(
     num: Number,
@@ -119,6 +118,7 @@ pub fn digit_comp<F: RawFloat, const FORMAT: u128>(
 /// Generate the significant digits with a positive exponent relative to
 /// mantissa.
 #[must_use]
+#[inline(always)]
 #[allow(clippy::unwrap_used)] // reason = "none is a developer error"
 #[allow(clippy::cast_possible_wrap)] // reason = "can't wrap in practice: max is ~1000 limbs"
 #[allow(clippy::missing_inline_in_public_items)] // reason = "only public for testing"
@@ -174,6 +174,8 @@ pub fn positive_digit_comp<F: RawFloat, const FORMAT: u128>(
 ///
 /// This allows us to compare both floats using integers efficiently
 /// without any loss of precision.
+#[must_use]
+#[inline(always)]
 #[allow(clippy::match_bool)] // reason = "simplifies documentation"
 #[allow(clippy::unwrap_used)] // reason = "unwrap panics if a developer error"
 #[allow(clippy::comparison_chain)] // reason = "logically different conditions for algorithm"