From a53448015b450c8fa31a4bbed70c86fae914a384 Mon Sep 17 00:00:00 2001 From: Gustavo Noronha Silva Date: Tue, 24 Sep 2024 22:31:35 -0300 Subject: [PATCH] cmp: completely avoid Rust fmt in verbose mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes the code less readable, but gets us a massive improvement to performance. Comparing ~36M completely different files now takes ~40% of the time. Compared to GNU cmp, we now run the same comparison in ~26% of the time. This also improves comparing binary files. A comparison of chromium and libxul now takes ~60% of the time. We also beat GNU cmpi by about the same margin. Before: > hyperfine --warmup 1 -i --output=pipe \ '../target/release/diffutils cmp -l huge huge.3' Benchmark 1: ../target/release/diffutils cmp -l huge huge.3 Time (mean ± σ): 2.000 s ± 0.016 s [User: 1.603 s, System: 0.392 s] Range (min … max): 1.989 s … 2.043 s 10 runs Warning: Ignoring non-zero exit code. > hyperfine --warmup 1 -i --output=pipe \ '../target/release/diffutils cmp -l -b \ /usr/lib64/chromium-browser/chromium-browser \ /usr/lib64/firefox/libxul.so' Benchmark 1: ../target/release/diffutils cmp -l -b /usr/lib64/chromium-browser/chromium-browser /usr/lib64/firefox/libxul.so Time (mean ± σ): 24.704 s ± 0.162 s [User: 21.948 s, System: 2.700 s] Range (min … max): 24.359 s … 24.889 s 10 runs Warning: Ignoring non-zero exit code. After: > hyperfine --warmup 1 -i --output=pipe \ '../target/release/diffutils cmp -l huge huge.3' Benchmark 1: ../target/release/diffutils cmp -l huge huge.3 Time (mean ± σ): 849.5 ms ± 6.2 ms [User: 538.3 ms, System: 306.8 ms] Range (min … max): 839.4 ms … 857.7 ms 10 runs Warning: Ignoring non-zero exit code. > hyperfine --warmup 1 -i --output=pipe \ '../target/release/diffutils cmp -l -b \ /usr/lib64/chromium-browser/chromium-browser \ /usr/lib64/firefox/libxul.so' Benchmark 1: ../target/release/diffutils cmp -l -b /usr/lib64/chromium-browser/chromium-browser /usr/lib64/firefox/libxul.so Time (mean ± σ): 14.646 s ± 0.040 s [User: 12.328 s, System: 2.286 s] Range (min … max): 14.585 s … 14.702 s 10 runs Warning: Ignoring non-zero exit code. --- src/cmp.rs | 84 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 65 insertions(+), 19 deletions(-) diff --git a/src/cmp.rs b/src/cmp.rs index f7fc8d6..4b646fe 100644 --- a/src/cmp.rs +++ b/src/cmp.rs @@ -529,6 +529,9 @@ fn format_byte(byte: u8) -> String { unsafe { String::from_utf8_unchecked(quoted) } } +// This function has been optimized to not use the Rust fmt system, which +// leads to a massive speed up when processing large files: cuts the time +// for comparing 2 ~36MB completely different files in half on an M1 Max. fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result<(), String> { assert!(!params.quiet); @@ -541,19 +544,49 @@ fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result< let mut from_oct = [0u8; 3]; // for octal conversions let mut to_oct = [0u8; 3]; + // Capacity calc: at_byte width + 2 x 3-byte octal numbers + 4-byte value + up to 2 byte value + 4 spaces + let mut output = Vec::::with_capacity(width + 3 * 2 + 4 + 2 + 4); + if params.print_bytes { for (at_byte, from_byte, to_byte) in diffs { + output.clear(); + + // "{:>width$} {:>3o} {:4} {:>3o} {}", let at_byte_str = at_byte_buf.format(at_byte); - writeln!( - stdout, - "{:>width$} {} {:4} {} {}", - at_byte_str, - format_octal(from_byte, &mut from_oct), - format_byte(from_byte), - format_octal(to_byte, &mut to_oct), - format_byte(to_byte), - ) - .map_err(|e| { + let at_byte_padding = width - at_byte_str.len(); + + for _ in 0..at_byte_padding { + output.push(b' ') + } + + output.extend_from_slice(at_byte_str.as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); + + output.push(b' '); + + let from_byte_str = format_byte(from_byte); + let from_byte_padding = 4 - from_byte_str.len(); + + output.extend_from_slice(from_byte_str.as_bytes()); + + for _ in 0..from_byte_padding { + output.push(b' ') + } + + output.push(b' '); + + output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_byte(to_byte).as_bytes()); + + output.push(b'\n'); + + stdout.write_all(output.as_slice()).map_err(|e| { format!( "{}: error printing output: {e}", params.executable.to_string_lossy() @@ -562,16 +595,29 @@ fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result< } } else { for (at_byte, from_byte, to_byte) in diffs { + output.clear(); + + // "{:>width$} {:>3o} {:>3o}" let at_byte_str = at_byte_buf.format(at_byte); - writeln!( - stdout, - "{:>width$} {} {}", - at_byte_str, - format_octal(from_byte, &mut from_oct), - format_octal(to_byte, &mut to_oct), - width = width - ) - .map_err(|e| { + let at_byte_padding = width - at_byte_str.len(); + + for _ in 0..at_byte_padding { + output.push(b' ') + } + + output.extend_from_slice(at_byte_str.as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); + + output.push(b'\n'); + + stdout.write_all(output.as_slice()).map_err(|e| { format!( "{}: error printing output: {e}", params.executable.to_string_lossy()