diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8e8272c1..ad26f617 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -75,36 +75,36 @@ jobs:
           --feature-powerset --exclude-features nightly,generic_const_exprs \
           --depth 1
 
-  codecov:
-    # See <https://doc.rust-lang.org/nightly/unstable-book/compiler-flags/source-based-code-coverage.html>
-    name: Coverage
-    runs-on: ubuntu-latest
-    timeout-minutes: 30
-    steps:
-      - uses: actions/checkout@v4
-      - uses: dtolnay/rust-toolchain@nightly
-        with:
-          components: llvm-tools-preview
-      - uses: Swatinem/rust-cache@v2
-      - uses: taiki-e/install-action@cargo-llvm-cov
-      - name: Generate code coverage
-        env:
-          # Covered tests are slower, so reduce the test load. (We also shouldn't
-          # depend too much on case generation for coverage).
-          PROPTEST_CASES: 5
-        run: |
-          # Generate profiles, but do not merge them
-          cargo llvm-cov --no-report --workspace --all-features --all-targets -- --nocapture
-          cargo llvm-cov --no-report --workspace --all-features --doc -- --nocapture
-      - name: Merge profiles
-        run: cargo llvm-cov --no-run --lcov --output-path lcov.info
-      - name: Submit to codecov.io
-        uses: codecov/codecov-action@v3
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }} # Optional for public repos
-          files: lcov.info
-          fail_ci_if_error: true
-          verbose: true
+  # codecov:
+  #   # See <https://doc.rust-lang.org/nightly/unstable-book/compiler-flags/source-based-code-coverage.html>
+  #   name: Coverage
+  #   runs-on: ubuntu-latest
+  #   timeout-minutes: 30
+  #   steps:
+  #     - uses: actions/checkout@v4
+  #     - uses: dtolnay/rust-toolchain@nightly
+  #       with:
+  #         components: llvm-tools-preview
+  #     - uses: Swatinem/rust-cache@v2
+  #     - uses: taiki-e/install-action@cargo-llvm-cov
+  #     - name: Generate code coverage
+  #       env:
+  #         # Covered tests are slower, so reduce the test load. (We also shouldn't
+  #         # depend too much on case generation for coverage).
+  #         PROPTEST_CASES: 5
+  #       run: |
+  #         # Generate profiles, but do not merge them
+  #         cargo llvm-cov --no-report --workspace --all-features --all-targets -- --nocapture
+  #         cargo llvm-cov --no-report --workspace --all-features --doc -- --nocapture
+  #     - name: Merge profiles
+  #       run: cargo llvm-cov --no-run --lcov --output-path lcov.info
+  #     - name: Submit to codecov.io
+  #       uses: codecov/codecov-action@v3
+  #       with:
+  #         token: ${{ secrets.CODECOV_TOKEN }} # Optional for public repos
+  #         files: lcov.info
+  #         fail_ci_if_error: true
+  #         verbose: true
 
   clippy:
     name: Clippy
diff --git a/deny.toml b/deny.toml
index caba8a08..fb3c29ee 100644
--- a/deny.toml
+++ b/deny.toml
@@ -26,7 +26,8 @@ allow = [
     "Unicode-DFS-2016",
     "Unlicense",
     "MPL-2.0",
-    "CC0-1.0"
+    "CC0-1.0",
+    "Unicode-3.0",
 ]
 
 [sources]
diff --git a/src/algorithms/mod.rs b/src/algorithms/mod.rs
index 69e09e2a..f056e405 100644
--- a/src/algorithms/mod.rs
+++ b/src/algorithms/mod.rs
@@ -20,7 +20,7 @@ pub use self::{
     add::{adc_n, sbb_n},
     div::div,
     gcd::{gcd, gcd_extended, inv_mod, LehmerMatrix},
-    mul::{add_nx1, addmul, addmul_n, addmul_nx1, addmul_ref, mul_nx1, submul_nx1},
+    mul::{add_nx1, addmul, addmul_n, addmul_nx1, mul_nx1, submul_nx1},
     ops::{adc, sbb},
     shift::{shift_left_small, shift_right_small},
 };
diff --git a/src/algorithms/mul.rs b/src/algorithms/mul.rs
index 2c5436ed..eff10c6d 100644
--- a/src/algorithms/mul.rs
+++ b/src/algorithms/mul.rs
@@ -2,46 +2,6 @@
 
 use crate::algorithms::{ops::sbb, DoubleWord};
 
-#[inline]
-#[allow(clippy::cast_possible_truncation)] // Intentional truncation.
-#[allow(dead_code)] // Used for testing
-pub fn addmul_ref(result: &mut [u64], a: &[u64], b: &[u64]) -> bool {
-    let mut overflow = 0;
-    for (i, a) in a.iter().copied().enumerate() {
-        let mut result = result.iter_mut().skip(i);
-        let mut b = b.iter().copied();
-        let mut carry = 0_u128;
-        loop {
-            match (result.next(), b.next()) {
-                // Partial product.
-                (Some(result), Some(b)) => {
-                    carry += u128::from(*result) + u128::from(a) * u128::from(b);
-                    *result = carry as u64;
-                    carry >>= 64;
-                }
-                // Carry propagation.
-                (Some(result), None) => {
-                    carry += u128::from(*result);
-                    *result = carry as u64;
-                    carry >>= 64;
-                }
-                // Excess product.
-                (None, Some(b)) => {
-                    carry += u128::from(a) * u128::from(b);
-                    overflow |= carry as u64;
-                    carry >>= 64;
-                }
-                // Fin.
-                (None, None) => {
-                    break;
-                }
-            }
-        }
-        overflow |= carry as u64;
-    }
-    overflow != 0
-}
-
 /// ⚠️ Computes `result += a * b` and checks for overflow.
 ///
 /// **Warning.** This function is not part of the stable API.
@@ -62,7 +22,7 @@ pub fn addmul_ref(result: &mut [u64], a: &[u64], b: &[u64]) -> bool {
 /// assert_eq!(overflow, false);
 /// assert_eq!(result, [12]);
 /// ```
-#[inline]
+#[inline(always)]
 pub fn addmul(mut lhs: &mut [u64], mut a: &[u64], mut b: &[u64]) -> bool {
     // Trim zeros from `a`
     while let [0, rest @ ..] = a {
@@ -116,15 +76,13 @@ pub fn addmul(mut lhs: &mut [u64], mut a: &[u64], mut b: &[u64]) -> bool {
 }
 
 /// Computes `lhs += a` and returns the carry.
-#[inline]
+#[inline(always)]
 pub fn add_nx1(lhs: &mut [u64], mut a: u64) -> u64 {
     if a == 0 {
         return 0;
     }
     for lhs in lhs {
-        let sum = u128::add(*lhs, a);
-        *lhs = sum.low();
-        a = sum.high();
+        (*lhs, a) = u128::add(*lhs, a).split();
         if a == 0 {
             return 0;
         }
@@ -147,18 +105,16 @@ pub fn addmul_n(lhs: &mut [u64], a: &[u64], b: &[u64]) {
         2 => addmul_2(lhs, a, b),
         3 => addmul_3(lhs, a, b),
         4 => addmul_4(lhs, a, b),
-        _ => {
-            let _ = addmul(lhs, a, b);
-        }
+        _ => _ = addmul(lhs, a, b),
     }
 }
 
 /// Computes `lhs += a * b` for 1 limb.
 #[inline(always)]
 fn addmul_1(lhs: &mut [u64], a: &[u64], b: &[u64]) {
-    assert_eq!(lhs.len(), 1);
-    assert_eq!(a.len(), 1);
-    assert_eq!(b.len(), 1);
+    assume!(lhs.len() == 1);
+    assume!(a.len() == 1);
+    assume!(b.len() == 1);
 
     mac(&mut lhs[0], a[0], b[0], 0);
 }
@@ -166,9 +122,9 @@ fn addmul_1(lhs: &mut [u64], a: &[u64], b: &[u64]) {
 /// Computes `lhs += a * b` for 2 limbs.
 #[inline(always)]
 fn addmul_2(lhs: &mut [u64], a: &[u64], b: &[u64]) {
-    assert_eq!(lhs.len(), 2);
-    assert_eq!(a.len(), 2);
-    assert_eq!(b.len(), 2);
+    assume!(lhs.len() == 2);
+    assume!(a.len() == 2);
+    assume!(b.len() == 2);
 
     let carry = mac(&mut lhs[0], a[0], b[0], 0);
     mac(&mut lhs[1], a[0], b[1], carry);
@@ -179,9 +135,9 @@ fn addmul_2(lhs: &mut [u64], a: &[u64], b: &[u64]) {
 /// Computes `lhs += a * b` for 3 limbs.
 #[inline(always)]
 fn addmul_3(lhs: &mut [u64], a: &[u64], b: &[u64]) {
-    assert_eq!(lhs.len(), 3);
-    assert_eq!(a.len(), 3);
-    assert_eq!(b.len(), 3);
+    assume!(lhs.len() == 3);
+    assume!(a.len() == 3);
+    assume!(b.len() == 3);
 
     let carry = mac(&mut lhs[0], a[0], b[0], 0);
     let carry = mac(&mut lhs[1], a[0], b[1], carry);
@@ -196,9 +152,9 @@ fn addmul_3(lhs: &mut [u64], a: &[u64], b: &[u64]) {
 /// Computes `lhs += a * b` for 4 limbs.
 #[inline(always)]
 fn addmul_4(lhs: &mut [u64], a: &[u64], b: &[u64]) {
-    assert_eq!(lhs.len(), 4);
-    assert_eq!(a.len(), 4);
-    assert_eq!(b.len(), 4);
+    assume!(lhs.len() == 4);
+    assume!(a.len() == 4);
+    assume!(b.len() == 4);
 
     let carry = mac(&mut lhs[0], a[0], b[0], 0);
     let carry = mac(&mut lhs[1], a[0], b[1], carry);
@@ -223,13 +179,11 @@ fn mac(lhs: &mut u64, a: u64, b: u64, c: u64) -> u64 {
 }
 
 /// Computes `lhs *= a` and returns the carry.
-#[inline]
+#[inline(always)]
 pub fn mul_nx1(lhs: &mut [u64], a: u64) -> u64 {
     let mut carry = 0;
-    for lhs in &mut *lhs {
-        let product = u128::muladd(*lhs, a, carry);
-        *lhs = product.low();
-        carry = product.high();
+    for lhs in lhs {
+        (*lhs, carry) = u128::muladd(*lhs, a, carry).split();
     }
     carry
 }
@@ -244,14 +198,12 @@ pub fn mul_nx1(lhs: &mut [u64], a: u64) -> u64 {
 /// \\\\ \mathsf{carry} &= \floor{\frac{\mathsf{lhs} + \mathsf{a} ⋅ \mathsf{b}
 /// }{2^{64⋅N}}} \end{aligned}
 /// $$
-#[inline]
+#[inline(always)]
 pub fn addmul_nx1(lhs: &mut [u64], a: &[u64], b: u64) -> u64 {
-    debug_assert_eq!(lhs.len(), a.len());
+    assume!(lhs.len() == a.len());
     let mut carry = 0;
-    for (lhs, a) in lhs.iter_mut().zip(a.iter().copied()) {
-        let product = u128::muladd2(a, b, carry, *lhs);
-        *lhs = product.low();
-        carry = product.high();
+    for i in 0..a.len() {
+        (lhs[i], carry) = u128::muladd2(a[i], b, carry, lhs[i]).split();
     }
     carry
 }
@@ -267,23 +219,18 @@ pub fn addmul_nx1(lhs: &mut [u64], a: &[u64], b: u64) -> u64 {
 /// \mathsf{lhs}}{2^{64⋅N}}} \end{aligned}
 /// $$
 // OPT: `carry` and `borrow` can probably be merged into a single var.
-#[inline]
+#[inline(always)]
 pub fn submul_nx1(lhs: &mut [u64], a: &[u64], b: u64) -> u64 {
-    debug_assert_eq!(lhs.len(), a.len());
+    assume!(lhs.len() == a.len());
     let mut carry = 0;
     let mut borrow = 0;
-    for (lhs, a) in lhs.iter_mut().zip(a.iter().copied()) {
+    for i in 0..a.len() {
         // Compute product limbs
-        let limb = {
-            let product = u128::muladd(a, b, carry);
-            carry = product.high();
-            product.low()
-        };
+        let limb;
+        (limb, carry) = u128::muladd(a[i], b, carry).split();
 
         // Subtract
-        let (new, b) = sbb(*lhs, limb, borrow);
-        *lhs = new;
-        borrow = b;
+        (lhs[i], borrow) = sbb(lhs[i], limb, borrow);
     }
     borrow + carry
 }
@@ -293,6 +240,44 @@ mod tests {
     use super::*;
     use proptest::{collection, num::u64, proptest};
 
+    #[allow(clippy::cast_possible_truncation)] // Intentional truncation.
+    fn addmul_ref(result: &mut [u64], a: &[u64], b: &[u64]) -> bool {
+        let mut overflow = 0;
+        for (i, a) in a.iter().copied().enumerate() {
+            let mut result = result.iter_mut().skip(i);
+            let mut b = b.iter().copied();
+            let mut carry = 0_u128;
+            loop {
+                match (result.next(), b.next()) {
+                    // Partial product.
+                    (Some(result), Some(b)) => {
+                        carry += u128::from(*result) + u128::from(a) * u128::from(b);
+                        *result = carry as u64;
+                        carry >>= 64;
+                    }
+                    // Carry propagation.
+                    (Some(result), None) => {
+                        carry += u128::from(*result);
+                        *result = carry as u64;
+                        carry >>= 64;
+                    }
+                    // Excess product.
+                    (None, Some(b)) => {
+                        carry += u128::from(a) * u128::from(b);
+                        overflow |= carry as u64;
+                        carry >>= 64;
+                    }
+                    // Fin.
+                    (None, None) => {
+                        break;
+                    }
+                }
+            }
+            overflow |= carry as u64;
+        }
+        overflow != 0
+    }
+
     #[test]
     fn test_addmul() {
         let any_vec = collection::vec(u64::ANY, 0..10);
diff --git a/src/bits.rs b/src/bits.rs
index eb2a40b5..d6a88c6c 100644
--- a/src/bits.rs
+++ b/src/bits.rs
@@ -254,53 +254,22 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
     /// the shift is larger than `BITS` (which is IMHO not very useful).
     #[inline]
     #[must_use]
-    pub fn overflowing_shl(mut self, rhs: usize) -> (Self, bool) {
+    pub fn overflowing_shl(self, rhs: usize) -> (Self, bool) {
         let (limbs, bits) = (rhs / 64, rhs % 64);
         if limbs >= LIMBS {
             return (Self::ZERO, self != Self::ZERO);
         }
-        if bits == 0 {
-            // Check for overflow
-            let mut overflow = false;
-            for i in (LIMBS - limbs)..LIMBS {
-                overflow |= self.limbs[i] != 0;
-            }
-            if self.limbs[LIMBS - limbs - 1] > Self::MASK {
-                overflow = true;
-            }
-
-            // Shift
-            for i in (limbs..LIMBS).rev() {
-                assume!(i >= limbs && i - limbs < LIMBS);
-                self.limbs[i] = self.limbs[i - limbs];
-            }
-            self.limbs[..limbs].fill(0);
-            self.limbs[LIMBS - 1] &= Self::MASK;
-            return (self, overflow);
-        }
 
-        // Check for overflow
-        let mut overflow = false;
-        for i in (LIMBS - limbs)..LIMBS {
-            overflow |= self.limbs[i] != 0;
-        }
-        if self.limbs[LIMBS - limbs - 1] >> (64 - bits) != 0 {
-            overflow = true;
+        let word_bits = 64;
+        let mut r = Self::ZERO;
+        let mut carry = 0;
+        for i in 0..Self::LIMBS - limbs {
+            let x = self.limbs[i];
+            r.limbs[i + limbs] = (x << bits) | carry;
+            carry = (x >> (word_bits - bits - 1)) >> 1;
         }
-        if self.limbs[LIMBS - limbs - 1] << bits > Self::MASK {
-            overflow = true;
-        }
-
-        // Shift
-        for i in (limbs + 1..LIMBS).rev() {
-            assume!(i - limbs < LIMBS && i - limbs - 1 < LIMBS);
-            self.limbs[i] = self.limbs[i - limbs] << bits;
-            self.limbs[i] |= self.limbs[i - limbs - 1] >> (64 - bits);
-        }
-        self.limbs[limbs] = self.limbs[0] << bits;
-        self.limbs[..limbs].fill(0);
-        self.limbs[LIMBS - 1] &= Self::MASK;
-        (self, overflow)
+        r.limbs[LIMBS - 1] &= Self::MASK;
+        (r, carry != 0)
     }
 
     /// Left shift by `rhs` bits.
@@ -349,38 +318,21 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
     /// the shift is larger than `BITS` (which is IMHO not very useful).
     #[inline]
     #[must_use]
-    pub fn overflowing_shr(mut self, rhs: usize) -> (Self, bool) {
+    pub fn overflowing_shr(self, rhs: usize) -> (Self, bool) {
         let (limbs, bits) = (rhs / 64, rhs % 64);
         if limbs >= LIMBS {
             return (Self::ZERO, self != Self::ZERO);
         }
-        if bits == 0 {
-            // Check for overflow
-            let mut overflow = false;
-            for i in 0..limbs {
-                overflow |= self.limbs[i] != 0;
-            }
-
-            // Shift
-            for i in 0..(LIMBS - limbs) {
-                self.limbs[i] = self.limbs[i + limbs];
-            }
-            self.limbs[LIMBS - limbs..].fill(0);
-            return (self, overflow);
-        }
-
-        // Check for overflow
-        let overflow = self.limbs[LIMBS - limbs - 1] >> (bits - 1) & 1 != 0;
 
-        // Shift
-        for i in 0..(LIMBS - limbs - 1) {
-            assume!(i + limbs < LIMBS && i + limbs + 1 < LIMBS);
-            self.limbs[i] = self.limbs[i + limbs] >> bits;
-            self.limbs[i] |= self.limbs[i + limbs + 1] << (64 - bits);
+        let word_bits = 64;
+        let mut r = Self::ZERO;
+        let mut carry = 0;
+        for i in 0..LIMBS - limbs {
+            let x = self.limbs[LIMBS - 1 - i];
+            r.limbs[LIMBS - 1 - i - limbs] = (x >> bits) | carry;
+            carry = (x << (word_bits - bits - 1)) << 1;
         }
-        self.limbs[LIMBS - limbs - 1] = self.limbs[LIMBS - 1] >> bits;
-        self.limbs[LIMBS - limbs..].fill(0);
-        (self, overflow)
+        (r, carry != 0)
     }
 
     /// Right shift by `rhs` bits.
diff --git a/src/div.rs b/src/div.rs
index ad6b757e..6f5ecc8c 100644
--- a/src/div.rs
+++ b/src/div.rs
@@ -7,7 +7,7 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
     #[must_use]
     #[allow(clippy::missing_const_for_fn)] // False positive
     pub fn checked_div(self, rhs: Self) -> Option<Self> {
-        if rhs == Self::ZERO {
+        if rhs.is_zero() {
             return None;
         }
         Some(self.div(rhs))
@@ -18,7 +18,7 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
     #[must_use]
     #[allow(clippy::missing_const_for_fn)] // False positive
     pub fn checked_rem(self, rhs: Self) -> Option<Self> {
-        if rhs == Self::ZERO {
+        if rhs.is_zero() {
             return None;
         }
         Some(self.rem(rhs))
@@ -33,9 +33,8 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
     #[must_use]
     #[track_caller]
     pub fn div_ceil(self, rhs: Self) -> Self {
-        assert!(rhs != Self::ZERO, "Division by zero");
         let (q, r) = self.div_rem(rhs);
-        if r == Self::ZERO {
+        if r.is_zero() {
             q
         } else {
             q + Self::from(1)
@@ -51,7 +50,6 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
     #[must_use]
     #[track_caller]
     pub fn div_rem(mut self, mut rhs: Self) -> (Self, Self) {
-        assert!(rhs != Self::ZERO, "Division by zero");
         algorithms::div(&mut self.limbs, &mut rhs.limbs);
         (self, rhs)
     }
diff --git a/src/log.rs b/src/log.rs
index 84e0fa5e..3c0bb685 100644
--- a/src/log.rs
+++ b/src/log.rs
@@ -9,7 +9,7 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
     #[inline]
     #[must_use]
     pub fn checked_log(self, base: Self) -> Option<usize> {
-        if base < Self::from(2) || self == Self::ZERO {
+        if base < Self::from(2) || self.is_zero() {
             return None;
         }
         Some(self.log(base))
diff --git a/src/macros.rs b/src/macros.rs
index 8c38dafa..92898c2f 100644
--- a/src/macros.rs
+++ b/src/macros.rs
@@ -75,6 +75,7 @@ macro_rules! impl_bin_op {
     };
 }
 
+#[allow(unused)]
 macro_rules! assume {
     ($e:expr $(,)?) => {
         if !$e {
@@ -89,6 +90,7 @@ macro_rules! assume {
     };
 }
 
+#[allow(unused)]
 macro_rules! debug_unreachable {
     ($($t:tt)*) => {
         if cfg!(debug_assertions) {
diff --git a/src/modular.rs b/src/modular.rs
index 1f94955b..30335a26 100644
--- a/src/modular.rs
+++ b/src/modular.rs
@@ -17,7 +17,7 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
     #[inline]
     #[must_use]
     pub fn reduce_mod(mut self, modulus: Self) -> Self {
-        if modulus == Self::ZERO {
+        if modulus.is_zero() {
             return Self::ZERO;
         }
         if self >= modulus {
@@ -53,7 +53,7 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
     #[inline]
     #[must_use]
     pub fn mul_mod(self, rhs: Self, mut modulus: Self) -> Self {
-        if modulus == Self::ZERO {
+        if modulus.is_zero() {
             return Self::ZERO;
         }
 
@@ -84,7 +84,7 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
     #[inline]
     #[must_use]
     pub fn pow_mod(mut self, mut exp: Self, modulus: Self) -> Self {
-        if modulus == Self::ZERO || modulus <= Self::from(1) {
+        if modulus.is_zero() || modulus <= Self::from(1) {
             // Also covers Self::BITS == 0
             return Self::ZERO;
         }
diff --git a/src/root.rs b/src/root.rs
index 695218a3..d99b7274 100644
--- a/src/root.rs
+++ b/src/root.rs
@@ -31,7 +31,7 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
         assert!(degree > 0, "degree must be greater than zero");
 
         // Handle zero case (including BITS == 0).
-        if self == Self::ZERO {
+        if self.is_zero() {
             return Self::ZERO;
         }
 
diff --git a/src/special.rs b/src/special.rs
index cda96890..11fa8b93 100644
--- a/src/special.rs
+++ b/src/special.rs
@@ -109,11 +109,11 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
     #[inline]
     #[must_use]
     pub fn checked_next_multiple_of(self, rhs: Self) -> Option<Self> {
-        if rhs == Self::ZERO {
+        if rhs.is_zero() {
             return None;
         }
         let (q, r) = self.div_rem(rhs);
-        if r == Self::ZERO {
+        if r.is_zero() {
             return Some(self);
         }
         let q = q.checked_add(Self::from(1))?;
diff --git a/src/support/serde.rs b/src/support/serde.rs
index cd6ba83b..c06160c9 100644
--- a/src/support/serde.rs
+++ b/src/support/serde.rs
@@ -39,29 +39,11 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
     }
 
     fn serialize_human_minimal<S: Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
-        if BITS == 0 {
+        if self.is_zero() {
             return s.serialize_str(ZERO_STR);
         }
 
-        let le_bytes = self.as_le_bytes();
-        let mut bytes = le_bytes.iter().rev().skip_while(|b| **b == 0);
-
-        // We avoid String allocation if there is no non-0 byte
-        // If there is a first byte, we allocate a string, and write the prefix
-        // and first byte to it
-        let mut result = match bytes.next() {
-            Some(b) => {
-                let mut result = String::with_capacity(2 + nbytes(BITS) * 2);
-                write!(result, "0x{b:x}").unwrap();
-                result
-            }
-            None => return s.serialize_str(ZERO_STR),
-        };
-        bytes
-            .try_for_each(|byte| write!(result, "{byte:02x}"))
-            .unwrap();
-
-        s.serialize_str(&result)
+        s.serialize_str(&format!("{self:#x}"))
     }
 
     fn serialize_binary<S: Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
@@ -83,7 +65,8 @@ impl<const BITS: usize, const LIMBS: usize> Serialize for Uint<BITS, LIMBS> {
     }
 }
 
-/// Deserialize human readable hex strings or byte arrays into hashes.
+/// Deserialize human readable hex strings or byte arrays into [`Uint`].
+///
 /// Hex strings can be upper/lower/mixed case, have an optional `0x` prefix, and
 /// can be any length. They are interpreted big-endian.
 impl<'de, const BITS: usize, const LIMBS: usize> Deserialize<'de> for Uint<BITS, LIMBS> {