diff --git a/Cargo.lock b/Cargo.lock index 0f83aa0a645..b39080f5ab9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4242,6 +4242,7 @@ dependencies = [ "codspeed-divan-compat", "fluent", "libc", + "memchr", "nix 0.30.1", "tempfile", "thiserror 2.0.16", diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 603c350b1f9..33316dcda88 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -1631,6 +1631,7 @@ dependencies = [ "clap", "fluent", "libc", + "memchr", "nix", "thiserror", "unicode-width", diff --git a/src/uu/wc/Cargo.toml b/src/uu/wc/Cargo.toml index 144fcd083ad..afee8082648 100644 --- a/src/uu/wc/Cargo.toml +++ b/src/uu/wc/Cargo.toml @@ -21,6 +21,7 @@ path = "src/wc.rs" clap = { workspace = true } uucore = { workspace = true, features = ["parser", "pipes", "quoting-style"] } bytecount = { workspace = true, features = ["runtime-dispatch-simd"] } +memchr = { workspace = true } thiserror = { workspace = true } unicode-width = { workspace = true } fluent = { workspace = true } diff --git a/src/uu/wc/src/count_fast.rs b/src/uu/wc/src/count_fast.rs index 9a473401e24..8ab931b020e 100644 --- a/src/uu/wc/src/count_fast.rs +++ b/src/uu/wc/src/count_fast.rs @@ -229,6 +229,63 @@ pub(crate) fn count_bytes_chars_and_lines_fast< const COUNT_LINES: bool, >( handle: &mut R, +) -> (WordCount, Option) { + // Use specialized implementations for common cases + match (COUNT_BYTES, COUNT_CHARS, COUNT_LINES) { + // Lines only - use memchr for fastest line counting + (false, false, true) => count_lines_only_fast(handle), + // Bytes + Lines - optimize using bytecount and avoid double counting + (true, false, true) => count_bytes_and_lines_fast(handle), + // Default implementation for other cases + _ => { + count_bytes_chars_and_lines_generic::(handle) + } + } +} + +/// Specialized fast line counting using memchr +fn count_lines_only_fast(handle: &mut R) -> (WordCount, Option) { + let mut total = WordCount::default(); + let buf: &mut [u8] = &mut AlignedBuffer::default().data; + + loop { + match handle.read(buf) { + Ok(0) => return (total, None), + Ok(n) => { + total.lines += memchr::memchr_iter(b'\n', &buf[..n]).count(); + } + Err(ref e) if e.kind() == ErrorKind::Interrupted => (), + Err(e) => return (total, Some(e)), + } + } +} + +/// Specialized fast byte and line counting using bytecount +fn count_bytes_and_lines_fast(handle: &mut R) -> (WordCount, Option) { + let mut total = WordCount::default(); + let buf: &mut [u8] = &mut AlignedBuffer::default().data; + + loop { + match handle.read(buf) { + Ok(0) => return (total, None), + Ok(n) => { + total.bytes += n; + total.lines += bytecount::count(&buf[..n], b'\n'); + } + Err(ref e) if e.kind() == ErrorKind::Interrupted => (), + Err(e) => return (total, Some(e)), + } + } +} + +/// Generic implementation for mixed counting +fn count_bytes_chars_and_lines_generic< + R: Read, + const COUNT_BYTES: bool, + const COUNT_CHARS: bool, + const COUNT_LINES: bool, +>( + handle: &mut R, ) -> (WordCount, Option) { let mut total = WordCount::default(); let buf: &mut [u8] = &mut AlignedBuffer::default().data;