Skip to content

Commit

Permalink
cmp: avoid using advanced rust formatting for -l
Browse files Browse the repository at this point in the history
Octal conversion and simple integer to string both show up in profiling.
This change improves comparing ~36M completely different files wth both
-l and -b by ~11-13%.
  • Loading branch information
kov committed Oct 1, 2024
1 parent 5005741 commit 2e68130
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 8 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ path = "src/main.rs"
[dependencies]
chrono = "0.4.38"
diff = "0.1.13"
itoa = "1.0.11"
regex = "1.10.4"
same-file = "1.0.6"
unicode-width = "0.2.0"
Expand Down
40 changes: 32 additions & 8 deletions src/cmp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,24 @@ fn is_ascii_printable(byte: u8) -> bool {
c.is_ascii() && !c.is_ascii_control()
}

#[inline]
fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str {
*buf = [b' ', b' ', b'0'];

let mut num = byte;
let mut idx = 2; // Start at the last position in the buffer

// Generate octal digits
while num > 0 {
buf[idx] = b'0' + num % 8;
num /= 8;
idx = idx.saturating_sub(1);
}

// SAFETY: the operations we do above always land within ascii range.
unsafe { std::str::from_utf8_unchecked(&buf[..]) }
}

#[inline]
fn format_byte(byte: u8) -> String {
let mut byte = byte;
Expand Down Expand Up @@ -520,15 +538,20 @@ fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result<
// Obtain the width of the first column from the last byte offset.
let width = format!("{}", offset).len();

let mut at_byte_buf = itoa::Buffer::new();
let mut from_oct = [0u8; 3]; // for octal conversions
let mut to_oct = [0u8; 3];

if params.print_bytes {
for (at_byte, from_byte, to_byte) in diffs {
let at_byte_str = at_byte_buf.format(at_byte);
writeln!(
stdout,
"{:>width$} {:>3o} {:4} {:>3o} {}",
at_byte,
from_byte,
"{:>width$} {} {:4} {} {}",
at_byte_str,
format_octal(from_byte, &mut from_oct),
format_byte(from_byte),
to_byte,
format_octal(to_byte, &mut to_oct),
format_byte(to_byte),
)
.map_err(|e| {
Expand All @@ -540,12 +563,13 @@ fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result<
}
} else {
for (at_byte, from_byte, to_byte) in diffs {
let at_byte_str = at_byte_buf.format(at_byte);
writeln!(
stdout,
"{:>width$} {:>3o} {:>3o}",
at_byte,
from_byte,
to_byte,
"{:>width$} {} {}",
at_byte_str,
format_octal(from_byte, &mut from_oct),
format_octal(to_byte, &mut to_oct),
width = width
)
.map_err(|e| {
Expand Down

0 comments on commit 2e68130

Please sign in to comment.