diff --git a/src/uucore/src/lib/features/encoding.rs b/src/uucore/src/lib/features/encoding.rs index 90a5e9ba8d8..6a2dccd4f18 100644 --- a/src/uucore/src/lib/features/encoding.rs +++ b/src/uucore/src/lib/features/encoding.rs @@ -266,36 +266,50 @@ impl SupportsFastDecodeAndEncode for Base58Wrapper { return Ok(()); } - // Convert bytes to big integer - let mut num: Vec = Vec::new(); + // Convert bytes to big integer (Vec in little-endian format) + let mut num = Vec::with_capacity(input_trimmed.len().div_ceil(4) + 1); for &byte in input_trimmed { - let mut carry = byte as u32; + let mut carry = byte as u64; for n in &mut num { - let tmp = (*n as u64) * 256 + carry as u64; + let tmp = (*n as u64) * 256 + carry; *n = tmp as u32; - carry = (tmp >> 32) as u32; + carry = tmp >> 32; } if carry > 0 { - num.push(carry); + num.push(carry as u32); } } // Convert to base58 - let mut result = Vec::new(); + let mut result = Vec::with_capacity((input_trimmed.len() * 138 / 100) + 1); let alphabet = self.alphabet(); - while !num.is_empty() && num.iter().any(|&n| n != 0) { + // Optimized check: stop when all elements are zero + while !num.is_empty() { + // Check if we're done (all zeros) + let mut all_zero = true; let mut carry = 0u64; + for n in num.iter_mut().rev() { let tmp = carry * (1u64 << 32) + *n as u64; *n = (tmp / 58) as u32; carry = tmp % 58; + if *n != 0 { + all_zero = false; + } } + result.push(alphabet[carry as usize]); - // Remove leading zeros - while num.last() == Some(&0) && num.len() > 1 { - num.pop(); + if all_zero { + break; + } + + // Trim trailing zeros less frequently + if num.len() > 1 && result.len() % 8 == 0 { + while num.last() == Some(&0) && num.len() > 1 { + num.pop(); + } } } @@ -305,7 +319,7 @@ impl SupportsFastDecodeAndEncode for Base58Wrapper { } // Add result (reversed because we built it backwards) - for byte in result.into_iter().rev() { + for &byte in result.iter().rev() { output.push_back(byte); } @@ -313,7 +327,10 @@ impl SupportsFastDecodeAndEncode for Base58Wrapper { } fn unpadded_multiple(&self) -> usize { - 1 // Base58 doesn't use padding + // Base58 must encode the entire input as one big integer, not in chunks + // Use a very large value to effectively disable chunking, but avoid overflow + // when multiplied by ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE (1024) in base_common + usize::MAX / 2048 } fn valid_decoding_multiple(&self) -> usize { diff --git a/tests/by-util/test_basenc.rs b/tests/by-util/test_basenc.rs index 0a6a6ddc1d6..f02de772b20 100644 --- a/tests/by-util/test_basenc.rs +++ b/tests/by-util/test_basenc.rs @@ -211,6 +211,31 @@ fn test_base58_decode() { .stdout_only("Hello, World!"); } +#[test] +fn test_base58_large_file_no_chunking() { + // Regression test: base58 must process entire input as one big integer, + // not in 1024-byte chunks. This test ensures files >1024 bytes work correctly. + let (at, mut ucmd) = at_and_ucmd!(); + let filename = "large_file.txt"; + + // spell-checker:disable + let input = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".repeat(50); + // spell-checker:enable + at.write(filename, &input); + + let result = ucmd.arg("--base58").arg(filename).succeeds(); + let encoded = result.stdout_str(); + + // Verify the output ends with the expected suffix (matches GNU basenc output) + // spell-checker:disable + assert!( + encoded + .trim_end() + .ends_with("ZNRRacEnhrY83ZEYkpwWVZNFK5DFRasr\nw693NsNGtiQ9fYAj") + ); + // spell-checker:enable +} + #[test] fn test_choose_last_encoding_base64() { new_ucmd!()