Add in spellchecking.

Fix the documentation for many steps and document using spellcheckers in the development guide.
Alexhuszagh · Dec 3, 2024 · 9c52945 · 9c52945
1 parent 2617dd9
commit 9c52945
Show file tree

Hide file tree

Showing 43 changed files with 442 additions and 232 deletions.
diff --git a/.gitignore b/.gitignore
@@ -32,3 +32,6 @@ cobertura.xml
 # Fuzzing data
 corpus/
 artifacts/
+
+# Spellchecking files
+spellcheck.txt
diff --git a/Cargo.toml b/Cargo.toml
@@ -9,6 +9,9 @@ members = [
 ]
 resolver = "2"
 
+[workspace.metadata.spellcheck]
+config = "spellcheck.toml"
+
 [profile.dev]
 opt-level = 0
 debug = true

diff --git a/docs/Development.md b/docs/Development.md
@@ -53,6 +53,10 @@ cargo +nightly install cargo-fuzz
 # which uses a deprecated binary that requires an old nightly to
 # install.
 cargo +nightly install cargo-count --git https://github.com/kbknapp/cargo-count --rev eebe6f8 --locked
+
+# Only if editing doc comments. This requires a Linux or macOS install.
+# On Ubuntu, the packages `libclang-dev` and `llvm` are required.
+cargo install cargo-spellcheck
 ```
 
 In addition, the following non-Rust dependencies must be installed:
@@ -117,3 +121,7 @@ Each workspace has a "docs" directory containing detailed descriptions of algori
 ## Pitfalls
 
 **ALWAYS** benchmark, even for trivial changes. I've been burned many times by `#[cfg(...)]` being way faster than `if cfg!()`, which youl would think both would be eliminated during optimization, just one during the first stage of compilation. It's better to confirm than assume. This is a nightmare development-wise because of how many features we support but there's not many alternatives: it seems it doesn't entirely remove code as if by tree-shaking which can majorly impact performance.
+
+## Documentation
+
+If making significant changes to the documentation, running the spellchecker can be useful. Remember these are **guidelines** and anything inside `libm.rs` should be ignored. To check the spelling, run `cargo spellcheck check`.
diff --git a/lexical-core/src/lib.rs b/lexical-core/src/lib.rs
@@ -185,11 +185,11 @@
 //! #### safe
 //!
 //! This replaces most unchecked indexing, required in cases where the
-//! compiler cannot ellide the check, with checked indexing. However,
+//! compiler cannot elide the check, with checked indexing. However,
 //! it does not fully replace all unsafe behavior with safe behavior.
-//! To minimize the risk of UB and out-of-bounds reads/writers, extensive
-//! edge-cases, property-based tests, and fuzzing is done with both the
-//! safe feature enabled and disabled, with the tests verified by Miri
+//! To minimize the risk of undefined behavior and out-of-bounds reads/writers,
+//! extensive edge-cases, property-based tests, and fuzzing is done with both
+//! the safe feature enabled and disabled, with the tests verified by Miri
 //! and Valgrind.
 //!
 //! # Configuration API

diff --git a/lexical-parse-float/etc/correctness/test-parse-random/validate.rs b/lexical-parse-float/etc/correctness/test-parse-random/validate.rs
@@ -42,7 +42,7 @@ pub struct Constants {
     ///
     /// This is a mapping from integers to half the precision available at that exponent. In other
     /// words, `0.5 * 2^n` = `2^(n-1)`, which is half the distance between `m * 2^n` and
-    /// `(m + 1) * 2^n`, m ∈ ℤ.
+    /// `(m + 1) * 2^n`, `m ∈ ℤ`.
     ///
     /// So, this is the maximum error from a real number to its floating point representation,
     /// assuming the float type can represent the exponent.

diff --git a/lexical-parse-float/src/api.rs b/lexical-parse-float/src/api.rs
@@ -19,7 +19,7 @@ const DEFAULT_OPTIONS: Options = Options::new();
 
 /// Implement `FromLexical` for numeric type.
 ///
-/// Need to inline these, otherwise codegen is suboptimal.
+/// Need to inline these, otherwise code generation is sub-optimal.
 /// For some reason, it can't determine some of the const evaluations
 /// can actually be evaluated at compile-time, which causes major branching
 /// issues.

diff --git a/lexical-parse-float/src/bellerophon.rs b/lexical-parse-float/src/bellerophon.rs
@@ -160,7 +160,7 @@ pub fn bellerophon<F: RawFloat, const FORMAT: u128>(num: &Number, lossy: bool) -
 // Specifically, we want to know if we are close to a halfway representation,
 // or halfway between `b` and `b+1`, or `b+h`. The halfway representation
 // has the form:
-//     SEEEEEEEHMMMMMMMMMMMMMMMMMMMMMMM100...
+//     `SEEEEEEEHMMMMMMMMMMMMMMMMMMMMMMM100...`
 // where:
 //     S = Sign Bit
 //     E = Exponent Bits
@@ -196,7 +196,7 @@ fn error_is_accurate<F: RawFloat>(errors: u32, fp: &ExtendedFloat80) -> bool {
     // See the documentation for dtoa for more information.
 
     // This is always a valid u32, since `fp.exp >= -64`
-    // will always be positive and the significand size is {23, 52}.
+    // will always be positive and the significand size is `{23, 52}`.
     let mantissa_shift = 64 - F::MANTISSA_SIZE - 1;
 
     // The unbiased exponent checks is `unbiased_exp <= F::MANTISSA_SIZE
@@ -293,7 +293,7 @@ fn error_is_accurate<F: RawFloat>(errors: u32, fp: &ExtendedFloat80) -> bool {
 #[cfg_attr(not(feature = "compact"), inline(always))]
 pub fn normalize(fp: &mut ExtendedFloat80) -> i32 {
     // Note:
-    // Using the ctlz intrinsic via leading_zeros is way faster (~10x)
+    // Using the ctlz intrinsic via `leading_zeros` is way faster (~10x)
     // than shifting 1-bit at a time, via while loop, and also way
     // faster (~2x) than an unrolled loop that checks at 32, 16, 4,
     // 2, and 1 bit.
@@ -304,7 +304,7 @@ pub fn normalize(fp: &mut ExtendedFloat80) -> i32 {
     // code as it removes conditional logic.
 
     // Calculate the number of leading zeros, and then zero-out
-    // any overflowing bits, to avoid shl overflow when self.mant == 0.
+    // any overflowing bits, to avoid shl overflow when `self.mant == 0`.
     if fp.mant != 0 {
         let shift = fp.mant.leading_zeros() as i32;
         fp.mant <<= shift;
@@ -358,7 +358,7 @@ pub fn mul(x: &ExtendedFloat80, y: &ExtendedFloat80) -> ExtendedFloat80 {
 // POWERS
 // ------
 
-/// Precalculated powers of base N for the Bellerophon algorithm.
+/// Pre-calculated powers of base N for the Bellerophon algorithm.
 pub struct BellerophonPowers {
     // Pre-calculated small powers.
     pub small: &'static [u64],
@@ -370,9 +370,9 @@ pub struct BellerophonPowers {
     pub step: i32,
     // Exponent bias for the large powers.
     pub bias: i32,
-    /// ceil(log2(radix)) scaled as a multiplier.
+    /// `ceil(log2(radix))` scaled as a multiplier.
     pub log2: i64,
-    /// Bitshift for the log2 multiplier.
+    /// Bit shift for the log2 multiplier.
     pub log2_shift: i32,
 }
 

diff --git a/lexical-parse-float/src/bigint.rs b/lexical-parse-float/src/bigint.rs
@@ -1,6 +1,6 @@
 //! A simple big-integer type for slow path algorithms.
 //!
-//! This includes minimal stackvector for use in big-integer arithmetic.
+//! This includes minimal stack vector for use in big-integer arithmetic.
 
 #![doc(hidden)]
 
@@ -996,10 +996,10 @@ pub const fn u64_to_hi64_2(r0: u64, r1: u64) -> (u64, bool) {
 ///
 /// Even using worst-case scenarios, exponentiation by squaring is
 /// significantly slower for our workloads. Just multiply by small powers,
-/// in simple cases, and use precalculated large powers in other cases.
+/// in simple cases, and use pre-calculated large powers in other cases.
 ///
 /// Furthermore, using sufficiently big large powers is also crucial for
-/// performance. This is a tradeoff of binary size and performance, and
+/// performance. This is a trade-off of binary size and performance, and
 /// using a single value at ~`5^(5 * max_exp)` seems optimal.
 #[allow(clippy::doc_markdown)] // reason="not attempted to be referencing items"
 #[allow(clippy::missing_inline_in_public_items)] // reason="only public for testing"
@@ -1116,23 +1116,23 @@ pub fn large_add_from<const SIZE: usize>(
     y: &[Limb],
     start: usize,
 ) -> Option<()> {
-    // The effective x buffer is from `xstart..x.len()`, so we need to treat
-    // that as the current range. If the effective y buffer is longer, need
+    // The effective `x` buffer is from `xstart..x.len()`, so we need to treat
+    // that as the current range. If the effective `y` buffer is longer, need
     // to resize to that, + the start index.
     if y.len() > x.len().saturating_sub(start) {
         // Ensure we panic if we can't extend the buffer.
         // This avoids any unsafe behavior afterwards.
         x.try_resize(y.len() + start, 0)?;
     }
 
-    // Iteratively add elements from y to x.
+    // Iteratively add elements from `y` to `x`.
     let mut carry = false;
     for index in 0..y.len() {
         let xi = &mut x[start + index];
         let yi = y[index];
 
         // Only one op of the two ops can overflow, since we added at max
-        // Limb::max_value() + Limb::max_value(). Add the previous carry,
+        // `Limb::max_value() + Limb::max_value()`. Add the previous carry,
         // and store the current carry for the next.
         let result = scalar_add(*xi, yi);
         *xi = result.0;
@@ -1432,7 +1432,7 @@ pub fn shl<const SIZE: usize>(x: &mut StackVec<SIZE>, n: usize) -> Option<()> {
 #[inline(always)]
 pub fn leading_zeros(x: &[Limb]) -> u32 {
     let length = x.len();
-    // wrapping_sub is fine, since it'll just return None.
+    // `wrapping_sub` is fine, since it'll just return None.
     if let Some(&value) = x.get(length.wrapping_sub(1)) {
         value.leading_zeros()
     } else {

diff --git a/lexical-parse-float/src/binary.rs b/lexical-parse-float/src/binary.rs
@@ -46,7 +46,7 @@ pub fn binary<F: RawFloat, const FORMAT: u128>(num: &Number, lossy: bool) -> Ext
     // is not a power-of-two. If it's odd and we're at halfway, we'll
     // always round-up **anyway**.
     //
-    // We need to check the truncated bits are equal to 0b100000....,
+    // We need to check the truncated bits are equal to `0b100000....`,
     // if it's above that, always round-up. If it's odd, we can always
     // disambiguate the float. If it's even, and exactly halfway, this
     // step fails.
@@ -96,7 +96,7 @@ pub fn binary<F: RawFloat, const FORMAT: u128>(num: &Number, lossy: bool) -> Ext
 ///
 /// We're guaranteed to have a large number of digits here
 /// (in general, 20+ or much higher), due to how close we
-/// are to a halfway representation, so an uncheced loop
+/// are to a halfway representation, so an unchecked loop
 /// optimization isn't worth it.
 #[cfg_attr(not(feature = "compact"), inline(always))]
 #[allow(unused_mut)]

diff --git a/lexical-parse-float/src/float.rs b/lexical-parse-float/src/float.rs
@@ -117,22 +117,22 @@ impl RawFloat for bf16 {
 /// algorithm.
 pub trait LemireFloat: RawFloat {
     // Round-to-even only happens for negative values of q
-    // when q ≥ −4 in the 64-bit case and when q ≥ −17 in
+    // when `q ≥ −4` in the 64-bit case and when `q ≥ −17` in
     // the 32-bitcase.
     //
-    // When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
-    // have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
-    // 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
+    // When `q ≥ 0`,we have that `5^q ≤ 2m+1`. In the 64-bit case,we
+    // have `5^q ≤ 2m+1 ≤ 2^54` or `q ≤ 23`. In the 32-bit case,we have
+    // `5^q ≤ 2m+1 ≤ 2^25` or `q ≤ 10`.
     //
-    // When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
-    // so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
-    // or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
-    // (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
-    // or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
+    // When q < 0, we have `w ≥ (2m+1)×5^−q`. We must have that `w < 2^64`
+    // so `(2m+1)×5^−q < 2^64`. We have that `2m+1 > 2^53` (64-bit case)
+    // or `2m+1 > 2^24` (32-bit case). Hence,we must have `2^53×5^−q < 2^64`
+    // (64-bit) and `2^24×5^−q < 2^64` (32-bit). Hence we have `5^−q < 2^11`
+    // or `q ≥ −4` (64-bit case) and `5^−q < 2^40` or `q ≥ −17` (32-bitcase).
     //
     // Thus we have that we only need to round ties to even when
-    // we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
-    // (in the 32-bit case). In both cases,the power of five(5^|q|)
+    // we have that `q ∈ [−4,23]` (in the 64-bit case) or `q∈[−17,10]`
+    // (in the 32-bit case). In both cases,the power of five (`5^|q|`)
     // fits in a 64-bit word.
     const MIN_EXPONENT_ROUND_TO_EVEN: i32;
     const MAX_EXPONENT_ROUND_TO_EVEN: i32;

diff --git a/lexical-parse-float/src/fpu.rs b/lexical-parse-float/src/fpu.rs
@@ -19,7 +19,7 @@ pub use fpu_precision::set_precision;
 #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
 mod fpu_precision {
     // We only support the latest nightly, which is 1.59+.
-    // THe `asm!` macro was stabilized in 1.59.0.
+    // The `asm!` macro was stabilized in 1.59.0.
     use core::arch::asm;
     use core::mem::size_of;
 

diff --git a/lexical-parse-float/src/lemire.rs b/lexical-parse-float/src/lemire.rs
@@ -74,16 +74,16 @@ pub fn compute_float<F: LemireFloat>(q: i64, mut w: u64, lossy: bool) -> Extende
     w <<= lz;
     let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_SIZE as usize + 3);
     if !lossy && lo == 0xFFFF_FFFF_FFFF_FFFF {
-        // If we have failed to approximate w x 5^-q with our 128-bit value.
+        // If we have failed to approximate `w x 5^-q` with our 128-bit value.
         // Since the addition of 1 could lead to an overflow which could then
         // round up over the half-way point, this can lead to improper rounding
         // of a float.
         //
-        // However, this can only occur if q ∈ [-27, 55]. The upper bound of q
-        // is 55 because 5^55 < 2^128, however, this can only happen if 5^q > 2^64,
+        // However, this can only occur if `q ∈ [-27, 55]`. The upper bound of q
+        // is 55 because `5^55 < 2^128`, however, this can only happen if `5^q > 2^64`,
         // since otherwise the product can be represented in 64-bits, producing
         // an exact result. For negative exponents, rounding-to-even can
-        // only occur if 5^-q < 2^64.
+        // only occur if `5^-q < 2^64`.
         //
         // For detailed explanations of rounding for negative exponents, see
         // <https://arxiv.org/pdf/2101.11408.pdf#section.9.1>. For detailed
@@ -117,7 +117,7 @@ pub fn compute_float<F: LemireFloat>(q: i64, mut w: u64, lossy: bool) -> Extende
     // need to round down.
     //
     // This will only occur if:
-    //  1. The lower 64 bits of the 128-bit representation is 0. IE, 5^q fits in
+    //  1. The lower 64 bits of the 128-bit representation is 0. IE, `5^q` fits in
     //     single 64-bit word.
     //  2. The least-significant bit prior to truncated mantissa is odd.
     //  3. All the bits truncated when shifting to mantissa bits + 1 are 0.
@@ -197,10 +197,10 @@ const fn full_multiplication(a: u64, b: u64) -> (u64, u64) {
     (r as u64, (r >> 64) as u64)
 }
 
-// This will compute or rather approximate w * 5**q and return a pair of 64-bit
-// words approximating the result, with the "high" part corresponding to the
-// most significant bits and the low part corresponding to the least significant
-// bits.
+// This will compute or rather approximate `w * 5**q` and return a pair of
+// 64-bit words approximating the result, with the "high" part corresponding to
+// the most significant bits and the low part corresponding to the least
+// significant bits.
 fn compute_product_approx(q: i64, w: u64, precision: usize) -> (u64, u64) {
     debug_assert!(q >= SMALLEST_POWER_OF_FIVE as i64, "must be within our required pow5 range");
     debug_assert!(q <= LARGEST_POWER_OF_FIVE as i64, "must be within our required pow5 range");
@@ -212,7 +212,7 @@ fn compute_product_approx(q: i64, w: u64, precision: usize) -> (u64, u64) {
         0xFFFF_FFFF_FFFF_FFFF_u64
     };
 
-    // 5^q < 2^64, then the multiplication always provides an exact value.
+    // `5^q < 2^64`, then the multiplication always provides an exact value.
     // That means whenever we need to round ties to even, we always have
     // an exact value.
     let index = (q - SMALLEST_POWER_OF_FIVE as i64) as usize;

diff --git a/lexical-parse-float/src/libm.rs b/lexical-parse-float/src/libm.rs
@@ -391,7 +391,7 @@ pub fn sqrtf(x: f32) -> f32 {
     {
         // Note: This path is unlikely since LLVM will usually have already
         // optimized sqrt calls into hardware instructions if sse is available,
-        // but if someone does end up here they'll apprected the speed increase.
+        // but if someone does end up here they'll appreciated the speed increase.
         #[cfg(target_arch = "x86")]
         use core::arch::x86::*;
         #[cfg(target_arch = "x86_64")]
@@ -1068,7 +1068,7 @@ pub fn sqrtd(x: f64) -> f64 {
     {
         // Note: This path is unlikely since LLVM will usually have already
         // optimized sqrt calls into hardware instructions if sse2 is available,
-        // but if someone does end up here they'll apprected the speed increase.
+        // but if someone does end up here they'll appreciated the speed increase.
         #[cfg(target_arch = "x86")]
         use core::arch::x86::*;
         #[cfg(target_arch = "x86_64")]

diff --git a/lexical-parse-float/src/limits.rs b/lexical-parse-float/src/limits.rs
@@ -766,8 +766,8 @@ pub const fn u64_power_limit(radix: u32) -> u32 {
 ///  > finite number of digits is that β should divide an integer power of γ.
 ///
 /// According to the "Handbook of Floating Point Arithmetic",
-/// for IEEE754, with emin being the min exponent, p2 being the
-/// precision, and b being the radix, the number of digits follows as:
+/// for IEEE754, with `emin` being the min exponent, `p2` being the
+/// precision, and `b` being the radix, the number of digits follows as:
 ///
 /// `−emin + p2 + ⌊(emin + 1) log(2, b) − log(1 − 2^(−p2), b)⌋`
 ///
@@ -900,8 +900,8 @@ impl MaxDigits for bf16 {
     }
 }
 
-///// emin = -16382
-///// p2 = 113
+///// `emin = -16382`
+///// `p2 = 113`
 //#[cfg(feature = "f128")]
 //impl MaxDigits for f128 {
 //    #[inline(always)]

diff --git a/lexical-parse-float/src/options.rs b/lexical-parse-float/src/options.rs
@@ -575,7 +575,7 @@ pub const RUBY_LITERAL: Options = Options::builder()
 const_assert!(RUBY_LITERAL.is_valid());
 
 /// Number format to parse a `Ruby` float from string.
-/// `Ruby` can write NaN and Infinity as strings, but won't roundtrip them back to floats.
+/// `Ruby` can write NaN and Infinity as strings, but won't round-trip them back to floats.
 #[rustfmt::skip]
 pub const RUBY_STRING: Options = Options::builder()
         .nan_string(options::RUBY_STRING_NONE)