diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 589b952..9ad1c17 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -41,6 +41,8 @@ jobs: strategy: matrix: test-target: + - { name: fuzz_cmp, should_pass: true } + - { name: fuzz_cmp_args, should_pass: true } - { name: fuzz_ed, should_pass: true } - { name: fuzz_normal, should_pass: true } - { name: fuzz_patch, should_pass: true } diff --git a/Cargo.lock b/Cargo.lock index 3330d21..fe461de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -128,6 +128,7 @@ dependencies = [ "assert_cmd", "chrono", "diff", + "itoa", "predicates", "pretty_assertions", "regex", @@ -190,6 +191,12 @@ dependencies = [ "cc", ] +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + [[package]] name = "js-sys" version = "0.3.69" diff --git a/Cargo.toml b/Cargo.toml index 477467c..6fa1a3c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ path = "src/main.rs" [dependencies] chrono = "0.4.38" diff = "0.1.13" +itoa = "1.0.11" regex = "1.10.4" same-file = "1.0.6" unicode-width = "0.2.0" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 5debf47..8b0b521 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -16,6 +16,18 @@ diffutils = { path = "../" } [workspace] members = ["."] +[[bin]] +name = "fuzz_cmp" +path = "fuzz_targets/fuzz_cmp.rs" +test = false +doc = false + +[[bin]] +name = "fuzz_cmp_args" +path = "fuzz_targets/fuzz_cmp_args.rs" +test = false +doc = false + [[bin]] name = "fuzz_patch" path = "fuzz_targets/fuzz_patch.rs" diff --git a/fuzz/dictionaries/cmp.txt b/fuzz/dictionaries/cmp.txt new file mode 100644 index 0000000..0365fef --- /dev/null +++ b/fuzz/dictionaries/cmp.txt @@ -0,0 +1,36 @@ +"-l" +"--verbose" +"-b" +"--print-bytes" +"-lb" +"-bl" +"-n" +"--bytes" +"--bytes=" +"--bytes=1024" +"--bytes=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" +"-i" +"--ignore-initial" +"--ignore-initial=" +"--ignore-initial=1024" +"--ignore-initial=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999:9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" +"-s" +"-q" +"--quiet" +"--silent" +"-" +"--" +"1kB" +"1G" +"1GB" +"1T" +"1TB" +"1P" +"1PB" +"1Z" +"1ZB" +"1Y" +"1YB" +"1Y" +"0" +"1:2" diff --git a/fuzz/fuzz_targets/fuzz_cmp.rs b/fuzz/fuzz_targets/fuzz_cmp.rs new file mode 100644 index 0000000..e9d0e4c --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_cmp.rs @@ -0,0 +1,51 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; +use diffutilslib::cmp::{self, Cmp}; + +use std::ffi::OsString; +use std::fs::File; +use std::io::Write; + +fn os(s: &str) -> OsString { + OsString::from(s) +} + +fuzz_target!(|x: (Vec, Vec)| { + let args = vec!["cmp", "-l", "-b", "target/fuzz.cmp.a", "target/fuzz.cmp.b"] + .into_iter() + .map(|s| os(s)) + .peekable(); + + let (from, to) = x; + + File::create("target/fuzz.cmp.a") + .unwrap() + .write_all(&from) + .unwrap(); + + File::create("target/fuzz.cmp.b") + .unwrap() + .write_all(&to) + .unwrap(); + + let params = + cmp::parse_params(args).unwrap_or_else(|e| panic!("Failed to parse params: {}", e)); + let ret = cmp::cmp(¶ms); + if from == to && !matches!(ret, Ok(Cmp::Equal)) { + panic!( + "target/fuzz.cmp.a and target/fuzz.cmp.b are equal, but cmp returned {:?}.", + ret + ); + } else if from != to && !matches!(ret, Ok(Cmp::Different)) { + panic!( + "target/fuzz.cmp.a and target/fuzz.cmp.b are different, but cmp returned {:?}.", + ret + ); + } else if ret.is_err() { + panic!( + "target/fuzz.cmp.a and target/fuzz.cmp.b caused cmp to error ({:?}).", + ret + ); + } +}); diff --git a/fuzz/fuzz_targets/fuzz_cmp_args.rs b/fuzz/fuzz_targets/fuzz_cmp_args.rs new file mode 100644 index 0000000..579cf34 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_cmp_args.rs @@ -0,0 +1,23 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; +use diffutilslib::cmp; + +use libfuzzer_sys::Corpus; +use std::ffi::OsString; + +fn os(s: &str) -> OsString { + OsString::from(s) +} + +fuzz_target!(|x: Vec| -> Corpus { + if x.len() > 6 { + // Make sure we try to parse an option when we get longer args. x[0] will be + // the executable name. + if ![os("-l"), os("-b"), os("-s"), os("-n"), os("-i")].contains(&x[1]) { + return Corpus::Reject; + } + } + let _ = cmp::parse_params(x.into_iter().peekable()); + Corpus::Keep +}); diff --git a/src/cmp.rs b/src/cmp.rs new file mode 100644 index 0000000..1d9ca9e --- /dev/null +++ b/src/cmp.rs @@ -0,0 +1,1185 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +use crate::utils::format_failure_to_read_input_file; +use std::env::{self, ArgsOs}; +use std::ffi::OsString; +use std::io::{BufRead, BufReader, BufWriter, Read, Write}; +use std::iter::Peekable; +use std::process::ExitCode; +use std::{fs, io}; + +#[cfg(not(target_os = "windows"))] +use std::os::fd::{AsRawFd, FromRawFd}; + +#[cfg(not(target_os = "windows"))] +use std::os::unix::fs::MetadataExt; + +#[cfg(target_os = "windows")] +use std::os::windows::fs::MetadataExt; + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct Params { + executable: OsString, + from: OsString, + to: OsString, + print_bytes: bool, + skip_a: Option, + skip_b: Option, + max_bytes: Option, + verbose: bool, + quiet: bool, +} + +#[inline] +fn usage_string(executable: &str) -> String { + format!("Usage: {} ", executable) +} + +#[cfg(not(target_os = "windows"))] +fn is_stdout_dev_null() -> bool { + let Ok(dev_null) = fs::metadata("/dev/null") else { + return false; + }; + + let stdout_fd = io::stdout().lock().as_raw_fd(); + + // SAFETY: we have exclusive access to stdout right now. + let stdout_file = unsafe { fs::File::from_raw_fd(stdout_fd) }; + let Ok(stdout) = stdout_file.metadata() else { + return false; + }; + + let is_dev_null = stdout.dev() == dev_null.dev() && stdout.ino() == dev_null.ino(); + + // Don't let File close the fd. It's unfortunate that File doesn't have a leak_fd(). + std::mem::forget(stdout_file); + + is_dev_null +} + +pub fn parse_params>(mut opts: Peekable) -> Result { + let Some(executable) = opts.next() else { + return Err("Usage: ".to_string()); + }; + let executable_str = executable.to_string_lossy().to_string(); + + let parse_skip = |param: &str, skip_desc: &str| -> Result { + let suffix_start = param + .find(|b: char| !b.is_ascii_digit()) + .unwrap_or(param.len()); + let mut num = match param[..suffix_start].parse::() { + Ok(num) => num, + Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, + Err(_) => { + return Err(format!( + "{}: invalid --ignore-initial value '{}'", + executable_str, skip_desc + )) + } + }; + + if suffix_start != param.len() { + // Note that GNU cmp advertises supporting up to Y, but fails if you try + // to actually use anything beyond E. + let multiplier: usize = match ¶m[suffix_start..] { + "kB" => 1_000, + "K" => 1_024, + "MB" => 1_000_000, + "M" => 1_048_576, + "GB" => 1_000_000_000, + "G" => 1_073_741_824, + "TB" => 1_000_000_000_000, + "T" => 1_099_511_627_776, + "PB" => 1_000_000_000_000_000, + "P" => 1_125_899_906_842_624, + "EB" => 1_000_000_000_000_000_000, + "E" => 1_152_921_504_606_846_976, + "ZB" => usize::MAX, // 1_000_000_000_000_000_000_000, + "Z" => usize::MAX, // 1_180_591_620_717_411_303_424, + "YB" => usize::MAX, // 1_000_000_000_000_000_000_000_000, + "Y" => usize::MAX, // 1_208_925_819_614_629_174_706_176, + _ => { + return Err(format!( + "{}: invalid --ignore-initial value '{}'", + executable_str, skip_desc + )); + } + }; + + num = match num.overflowing_mul(multiplier) { + (n, false) => n, + _ => usize::MAX, + } + } + + Ok(num) + }; + + let mut params = Params { + executable, + ..Default::default() + }; + let mut from = None; + let mut to = None; + let mut skip_pos1 = None; + let mut skip_pos2 = None; + while let Some(param) = opts.next() { + if param == "--" { + break; + } + if param == "-" { + if from.is_none() { + from = Some(param); + } else if to.is_none() { + to = Some(param); + } else { + return Err(usage_string(&executable_str)); + } + continue; + } + if param == "-b" || param == "--print-bytes" { + params.print_bytes = true; + continue; + } + if param == "-l" || param == "--verbose" { + params.verbose = true; + continue; + } + if param == "-lb" || param == "-bl" { + params.print_bytes = true; + params.verbose = true; + continue; + } + + let param_str = param.to_string_lossy().to_string(); + if param == "-n" || param_str.starts_with("--bytes=") { + let max_bytes = if param == "-n" { + opts.next() + .ok_or_else(|| usage_string(&executable_str))? + .to_string_lossy() + .to_string() + } else { + let (_, arg) = param_str.split_once('=').unwrap(); + arg.to_string() + }; + let max_bytes = match max_bytes.parse::() { + Ok(num) => num, + Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, + Err(_) => { + return Err(format!( + "{}: invalid --bytes value '{}'", + executable_str, max_bytes + )) + } + }; + params.max_bytes = Some(max_bytes); + continue; + } + if param == "-i" || param_str.starts_with("--ignore-initial=") { + let skip_desc = if param == "-i" { + opts.next() + .ok_or_else(|| usage_string(&executable_str))? + .to_string_lossy() + .to_string() + } else { + let (_, arg) = param_str.split_once('=').unwrap(); + arg.to_string() + }; + let (skip_a, skip_b) = if let Some((skip_a, skip_b)) = skip_desc.split_once(':') { + ( + parse_skip(skip_a, &skip_desc)?, + parse_skip(skip_b, &skip_desc)?, + ) + } else { + let skip = parse_skip(&skip_desc, &skip_desc)?; + (skip, skip) + }; + params.skip_a = Some(skip_a); + params.skip_b = Some(skip_b); + continue; + } + if param == "-s" || param == "--quiet" || param == "--silent" { + params.quiet = true; + continue; + } + if param == "--help" { + println!("{}", usage_string(&executable_str)); + std::process::exit(0); + } + if param_str.starts_with('-') { + return Err(format!("Unknown option: {:?}", param)); + } + if from.is_none() { + from = Some(param); + } else if to.is_none() { + to = Some(param); + } else if skip_pos1.is_none() { + skip_pos1 = Some(parse_skip(¶m_str, ¶m_str)?); + } else if skip_pos2.is_none() { + skip_pos2 = Some(parse_skip(¶m_str, ¶m_str)?); + } else { + return Err(usage_string(&executable_str)); + } + } + + // Do as GNU cmp, and completely disable printing if we are + // outputing to /dev/null. + #[cfg(not(target_os = "windows"))] + if is_stdout_dev_null() { + params.quiet = true; + params.verbose = false; + params.print_bytes = false; + } + + if params.quiet && params.verbose { + return Err(format!( + "{}: options -l and -s are incompatible", + executable_str + )); + } + + params.from = if let Some(from) = from { + from + } else if let Some(param) = opts.next() { + param + } else { + return Err(usage_string(&executable_str)); + }; + params.to = if let Some(to) = to { + to + } else if let Some(param) = opts.next() { + param + } else { + OsString::from("-") + }; + + // GNU cmp ignores positional skip arguments if -i is provided. + if params.skip_a.is_none() { + if skip_pos1.is_some() { + params.skip_a = skip_pos1; + } else if let Some(param) = opts.next() { + let param_str = param.to_string_lossy().to_string(); + params.skip_a = Some(parse_skip(¶m_str, ¶m_str)?); + } + }; + if params.skip_b.is_none() { + if skip_pos2.is_some() { + params.skip_b = skip_pos2; + } else if let Some(param) = opts.next() { + let param_str = param.to_string_lossy().to_string(); + params.skip_b = Some(parse_skip(¶m_str, ¶m_str)?); + } + } + + Ok(params) +} + +fn prepare_reader( + path: &OsString, + skip: &Option, + params: &Params, +) -> Result, String> { + let mut reader: Box = if path == "-" { + Box::new(BufReader::new(io::stdin())) + } else { + match fs::File::open(path) { + Ok(file) => Box::new(BufReader::new(file)), + Err(e) => { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + path, + &e, + )); + } + } + }; + + if let Some(skip) = skip { + if let Err(e) = io::copy(&mut reader.by_ref().take(*skip as u64), &mut io::sink()) { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + path, + &e, + )); + } + } + + Ok(reader) +} + +#[derive(Debug)] +pub enum Cmp { + Equal, + Different, +} + +pub fn cmp(params: &Params) -> Result { + let mut from = prepare_reader(¶ms.from, ¶ms.skip_a, params)?; + let mut to = prepare_reader(¶ms.to, ¶ms.skip_b, params)?; + + let mut at_byte = 1; + let mut at_line = 1; + let mut start_of_line = true; + let mut verbose_diffs = vec![]; + loop { + // Fill up our buffers. + let from_buf = match from.fill_buf() { + Ok(buf) => buf, + Err(e) => { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + ¶ms.from, + &e, + )); + } + }; + + let to_buf = match to.fill_buf() { + Ok(buf) => buf, + Err(e) => { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + ¶ms.to, + &e, + )); + } + }; + + // Check for EOF conditions. + if from_buf.is_empty() && to_buf.is_empty() { + break; + } + + if from_buf.is_empty() || to_buf.is_empty() { + let eof_on = if from_buf.is_empty() { + ¶ms.from.to_string_lossy() + } else { + ¶ms.to.to_string_lossy() + }; + + if params.verbose { + report_verbose_diffs(verbose_diffs, params)?; + } + + report_eof(at_byte, at_line, start_of_line, eof_on, params); + return Ok(Cmp::Different); + } + + // Fast path - for long files in which almost all bytes are the same we + // can do a direct comparison to let the compiler optimize. + let consumed = std::cmp::min(from_buf.len(), to_buf.len()); + if from_buf[..consumed] == to_buf[..consumed] { + let last = from_buf[..consumed].last().unwrap(); + + at_byte += consumed; + at_line += from_buf[..consumed].iter().filter(|&c| *c == b'\n').count(); + + start_of_line = *last == b'\n'; + + if let Some(max_bytes) = params.max_bytes { + if at_byte > max_bytes { + break; + } + } + + from.consume(consumed); + to.consume(consumed); + + continue; + } + + // Iterate over the buffers, the zip iterator will stop us as soon as the + // first one runs out. + for (&from_byte, &to_byte) in from_buf.iter().zip(to_buf.iter()) { + if from_byte != to_byte { + if params.verbose { + verbose_diffs.push((at_byte, from_byte, to_byte)); + } else { + report_difference(from_byte, to_byte, at_byte, at_line, params); + return Ok(Cmp::Different); + } + } + + start_of_line = from_byte == b'\n'; + if start_of_line { + at_line += 1; + } + + at_byte += 1; + + if let Some(max_bytes) = params.max_bytes { + if at_byte > max_bytes { + break; + } + } + } + + // Notify our readers about the bytes we went over. + from.consume(consumed); + to.consume(consumed); + } + + if params.verbose && !verbose_diffs.is_empty() { + report_verbose_diffs(verbose_diffs, params)?; + return Ok(Cmp::Different); + } + + Ok(Cmp::Equal) +} + +// Exit codes are documented at +// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-cmp.html +// An exit status of 0 means no differences were found, +// 1 means some differences were found, +// and 2 means trouble. +pub fn main(opts: Peekable) -> ExitCode { + let params = match parse_params(opts) { + Ok(param) => param, + Err(e) => { + eprintln!("{e}"); + return ExitCode::from(2); + } + }; + + if params.from == "-" && params.to == "-" + || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) + { + return ExitCode::SUCCESS; + } + + // If the files have different sizes, we already know they are not identical. If we have not + // been asked to show even the first difference, we can quit early. + if params.quiet { + if let (Ok(a_meta), Ok(b_meta)) = (fs::metadata(¶ms.from), fs::metadata(¶ms.to)) { + #[cfg(not(target_os = "windows"))] + if a_meta.size() != b_meta.size() { + return ExitCode::from(1); + } + #[cfg(target_os = "windows")] + if a_meta.file_size() != b_meta.file_size() { + return ExitCode::from(1); + } + } + } + + match cmp(¶ms) { + Ok(Cmp::Equal) => ExitCode::SUCCESS, + Ok(Cmp::Different) => ExitCode::from(1), + Err(e) => { + if !params.quiet { + eprintln!("{e}"); + } + ExitCode::from(2) + } + } +} + +#[inline] +fn is_ascii_printable(byte: u8) -> bool { + let c = byte as char; + c.is_ascii() && !c.is_ascii_control() +} + +#[inline] +fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str { + *buf = [b' ', b' ', b'0']; + + let mut num = byte; + let mut idx = 2; // Start at the last position in the buffer + + // Generate octal digits + while num > 0 { + buf[idx] = b'0' + num % 8; + num /= 8; + idx = idx.saturating_sub(1); + } + + // SAFETY: the operations we do above always land within ascii range. + unsafe { std::str::from_utf8_unchecked(&buf[..]) } +} + +#[inline] +fn format_byte(byte: u8) -> String { + let mut byte = byte; + let mut quoted = vec![]; + + if !is_ascii_printable(byte) { + if byte >= 128 { + quoted.push(b'M'); + quoted.push(b'-'); + byte -= 128; + } + + if byte < 32 { + quoted.push(b'^'); + byte += 64; + } else if byte == 127 { + quoted.push(b'^'); + byte = b'?'; + } + assert!((byte as char).is_ascii()); + } + + quoted.push(byte); + + // SAFETY: the checks and shifts we do above match what cat and GNU + // cmp do to ensure characters fall inside the ascii range. + unsafe { String::from_utf8_unchecked(quoted) } +} + +// This function has been optimized to not use the Rust fmt system, which +// leads to a massive speed up when processing large files: cuts the time +// for comparing 2 ~36MB completely different files in half on an M1 Max. +fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result<(), String> { + assert!(!params.quiet); + + let mut stdout = BufWriter::new(io::stdout().lock()); + if let Some((offset, _, _)) = diffs.last() { + // Obtain the width of the first column from the last byte offset. + let width = format!("{}", offset).len(); + + let mut at_byte_buf = itoa::Buffer::new(); + let mut from_oct = [0u8; 3]; // for octal conversions + let mut to_oct = [0u8; 3]; + + // Capacity calc: at_byte width + 2 x 3-byte octal numbers + 4-byte value + up to 2 byte value + 4 spaces + let mut output = Vec::::with_capacity(width + 3 * 2 + 4 + 2 + 4); + + if params.print_bytes { + for (at_byte, from_byte, to_byte) in diffs { + output.clear(); + + // "{:>width$} {:>3o} {:4} {:>3o} {}", + let at_byte_str = at_byte_buf.format(at_byte); + let at_byte_padding = width - at_byte_str.len(); + + for _ in 0..at_byte_padding { + output.push(b' ') + } + + output.extend_from_slice(at_byte_str.as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); + + output.push(b' '); + + let from_byte_str = format_byte(from_byte); + let from_byte_padding = 4 - from_byte_str.len(); + + output.extend_from_slice(from_byte_str.as_bytes()); + + for _ in 0..from_byte_padding { + output.push(b' ') + } + + output.push(b' '); + + output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_byte(to_byte).as_bytes()); + + output.push(b'\n'); + + stdout.write_all(output.as_slice()).map_err(|e| { + format!( + "{}: error printing output: {e}", + params.executable.to_string_lossy() + ) + })?; + } + } else { + for (at_byte, from_byte, to_byte) in diffs { + output.clear(); + + // "{:>width$} {:>3o} {:>3o}" + let at_byte_str = at_byte_buf.format(at_byte); + let at_byte_padding = width - at_byte_str.len(); + + for _ in 0..at_byte_padding { + output.push(b' ') + } + + output.extend_from_slice(at_byte_str.as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); + + output.push(b'\n'); + + stdout.write_all(output.as_slice()).map_err(|e| { + format!( + "{}: error printing output: {e}", + params.executable.to_string_lossy() + ) + })?; + } + } + } + + Ok(()) +} + +#[inline] +fn report_eof(at_byte: usize, at_line: usize, start_of_line: bool, eof_on: &str, params: &Params) { + if params.quiet { + return; + } + + if at_byte == 1 { + eprintln!( + "{}: EOF on '{}' which is empty", + params.executable.to_string_lossy(), + eof_on + ); + } else if params.verbose { + eprintln!( + "{}: EOF on '{}' after byte {}", + params.executable.to_string_lossy(), + eof_on, + at_byte - 1, + ); + } else if start_of_line { + eprintln!( + "{}: EOF on '{}' after byte {}, line {}", + params.executable.to_string_lossy(), + eof_on, + at_byte - 1, + at_line - 1 + ); + } else { + eprintln!( + "{}: EOF on '{}' after byte {}, in line {}", + params.executable.to_string_lossy(), + eof_on, + at_byte - 1, + at_line + ); + } +} + +fn is_posix_locale() -> bool { + let locale = if let Ok(locale) = env::var("LC_ALL") { + locale + } else if let Ok(locale) = env::var("LC_MESSAGES") { + locale + } else if let Ok(locale) = env::var("LANG") { + locale + } else { + "C".to_string() + }; + + locale == "C" || locale == "POSIX" +} + +#[inline] +fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize, params: &Params) { + if params.quiet { + return; + } + + let term = if is_posix_locale() && !params.print_bytes { + "char" + } else { + "byte" + }; + print!( + "{} {} differ: {term} {}, line {}", + ¶ms.from.to_string_lossy(), + ¶ms.to.to_string_lossy(), + at_byte, + at_line + ); + if params.print_bytes { + let char_width = if to_byte >= 0x7F { 2 } else { 1 }; + print!( + " is {:>3o} {:char_width$} {:>3o} {:char_width$}", + from_byte, + format_byte(from_byte), + to_byte, + format_byte(to_byte) + ); + } + println!(); +} + +#[cfg(test)] +mod tests { + use super::*; + fn os(s: &str) -> OsString { + OsString::from(s) + } + + #[test] + fn positional() { + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + ..Default::default() + }), + parse_params([os("cmp"), os("foo"), os("bar")].iter().cloned().peekable()) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("-"), + ..Default::default() + }), + parse_params([os("cmp"), os("foo")].iter().cloned().peekable()) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("--help"), + ..Default::default() + }), + parse_params( + [os("cmp"), os("foo"), os("--"), os("--help")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: None, + ..Default::default() + }), + parse_params( + [os("cmp"), os("foo"), os("bar"), os("1")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(usize::MAX), + ..Default::default() + }), + parse_params( + [os("cmp"), os("foo"), os("bar"), os("1"), os("2Y")] + .iter() + .cloned() + .peekable() + ) + ); + + // Bad positional arguments. + assert_eq!( + Err("Usage: cmp ".to_string()), + parse_params( + [os("cmp"), os("foo"), os("bar"), os("1"), os("2"), os("3")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Err("Usage: cmp ".to_string()), + parse_params([os("cmp")].iter().cloned().peekable()) + ); + } + + #[test] + fn execution_modes() { + let print_bytes = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + print_bytes: true, + ..Default::default() + }; + assert_eq!( + Ok(print_bytes.clone()), + parse_params( + [os("cmp"), os("-b"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(print_bytes), + parse_params( + [os("cmp"), os("--print-bytes"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + let verbose = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + verbose: true, + ..Default::default() + }; + assert_eq!( + Ok(verbose.clone()), + parse_params( + [os("cmp"), os("-l"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(verbose), + parse_params( + [os("cmp"), os("--verbose"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + let verbose_and_print_bytes = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + print_bytes: true, + verbose: true, + ..Default::default() + }; + assert_eq!( + Ok(verbose_and_print_bytes.clone()), + parse_params( + [os("cmp"), os("-l"), os("-b"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(verbose_and_print_bytes.clone()), + parse_params( + [os("cmp"), os("-lb"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(verbose_and_print_bytes), + parse_params( + [os("cmp"), os("-bl"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + quiet: true, + ..Default::default() + }), + parse_params( + [os("cmp"), os("-s"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + // Some options do not mix. + assert_eq!( + Err("cmp: options -l and -s are incompatible".to_string()), + parse_params( + [os("cmp"), os("-l"), os("-s"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + } + + #[test] + fn max_bytes() { + let max_bytes = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + max_bytes: Some(1), + ..Default::default() + }; + assert_eq!( + Ok(max_bytes.clone()), + parse_params( + [os("cmp"), os("-n"), os("1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(max_bytes), + parse_params( + [os("cmp"), os("--bytes=1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + max_bytes: Some(usize::MAX), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("--bytes=99999999999999999999999999999999999999999999999999999999999"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + + // Failure case + assert_eq!( + Err("cmp: invalid --bytes value '1K'".to_string()), + parse_params( + [os("cmp"), os("--bytes=1K"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + } + + #[test] + fn skips() { + let skips = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(1), + ..Default::default() + }; + assert_eq!( + Ok(skips.clone()), + parse_params( + [os("cmp"), os("-i"), os("1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(skips), + parse_params( + [os("cmp"), os("--ignore-initial=1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(usize::MAX), + skip_b: Some(usize::MAX), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("-i"), + os("99999999999999999999999999999999999999999999999999999999999"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(2), + ..Default::default() + }), + parse_params( + [os("cmp"), os("--ignore-initial=1:2"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1_000_000_000), + skip_b: Some(1_152_921_504_606_846_976 * 2), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("--ignore-initial=1GB:2E"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + + // All special suffixes. + for (i, suffixes) in [ + ["kB", "K"], + ["MB", "M"], + ["GB", "G"], + ["TB", "T"], + ["PB", "P"], + ["EB", "E"], + ["ZB", "Z"], + ["YB", "Y"], + ] + .iter() + .enumerate() + { + let values = [ + 1_000usize.checked_pow((i + 1) as u32).unwrap_or(usize::MAX), + 1024usize.checked_pow((i + 1) as u32).unwrap_or(usize::MAX), + ]; + for (j, v) in values.iter().enumerate() { + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(*v), + skip_b: Some(2), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("-i"), + os(&format!("1{}:2", suffixes[j])), + os("foo"), + os("bar"), + ] + .iter() + .cloned() + .peekable() + ) + ); + } + } + + // Ignores positional arguments when -i is provided. + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(2), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("-i"), + os("1:2"), + os("foo"), + os("bar"), + os("3"), + os("4") + ] + .iter() + .cloned() + .peekable() + ) + ); + + // Failure cases + assert_eq!( + Err("cmp: invalid --ignore-initial value '1mb'".to_string()), + parse_params( + [os("cmp"), os("--ignore-initial=1mb"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Err("cmp: invalid --ignore-initial value '1:2:3'".to_string()), + parse_params( + [ + os("cmp"), + os("--ignore-initial=1:2:3"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Err("cmp: invalid --ignore-initial value '-1'".to_string()), + parse_params( + [os("cmp"), os("--ignore-initial=-1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + } +} diff --git a/src/diff.rs b/src/diff.rs new file mode 100644 index 0000000..f769a29 --- /dev/null +++ b/src/diff.rs @@ -0,0 +1,98 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +use crate::params::{parse_params, Format}; +use crate::utils::report_failure_to_read_input_file; +use crate::{context_diff, ed_diff, normal_diff, unified_diff}; +use std::env::ArgsOs; +use std::ffi::OsString; +use std::fs; +use std::io::{self, Read, Write}; +use std::iter::Peekable; +use std::process::{exit, ExitCode}; + +// Exit codes are documented at +// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-diff.html. +// An exit status of 0 means no differences were found, +// 1 means some differences were found, +// and 2 means trouble. +pub fn main(opts: Peekable) -> ExitCode { + let params = parse_params(opts).unwrap_or_else(|error| { + eprintln!("{error}"); + exit(2); + }); + // if from and to are the same file, no need to perform any comparison + let maybe_report_identical_files = || { + if params.report_identical_files { + println!( + "Files {} and {} are identical", + params.from.to_string_lossy(), + params.to.to_string_lossy(), + ); + } + }; + if params.from == "-" && params.to == "-" + || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) + { + maybe_report_identical_files(); + return ExitCode::SUCCESS; + } + + // read files + fn read_file_contents(filepath: &OsString) -> io::Result> { + if filepath == "-" { + let mut content = Vec::new(); + io::stdin().read_to_end(&mut content).and(Ok(content)) + } else { + fs::read(filepath) + } + } + let mut io_error = false; + let from_content = match read_file_contents(¶ms.from) { + Ok(from_content) => from_content, + Err(e) => { + report_failure_to_read_input_file(¶ms.executable, ¶ms.from, &e); + io_error = true; + vec![] + } + }; + let to_content = match read_file_contents(¶ms.to) { + Ok(to_content) => to_content, + Err(e) => { + report_failure_to_read_input_file(¶ms.executable, ¶ms.to, &e); + io_error = true; + vec![] + } + }; + if io_error { + return ExitCode::from(2); + } + + // run diff + let result: Vec = match params.format { + Format::Normal => normal_diff::diff(&from_content, &to_content, ¶ms), + Format::Unified => unified_diff::diff(&from_content, &to_content, ¶ms), + Format::Context => context_diff::diff(&from_content, &to_content, ¶ms), + Format::Ed => ed_diff::diff(&from_content, &to_content, ¶ms).unwrap_or_else(|error| { + eprintln!("{error}"); + exit(2); + }), + }; + if params.brief && !result.is_empty() { + println!( + "Files {} and {} differ", + params.from.to_string_lossy(), + params.to.to_string_lossy() + ); + } else { + io::stdout().write_all(&result).unwrap(); + } + if result.is_empty() { + maybe_report_identical_files(); + ExitCode::SUCCESS + } else { + ExitCode::from(1) + } +} diff --git a/src/lib.rs b/src/lib.rs index 0bb911b..a20ac56 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +pub mod cmp; pub mod context_diff; pub mod ed_diff; pub mod macros; diff --git a/src/main.rs b/src/main.rs index 7e221ea..8194d00 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,15 +3,17 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use crate::params::{parse_params, Format}; -use regex::Regex; -use std::env; -use std::ffi::OsString; -use std::fs; -use std::io::{self, Read, Write}; -use std::process::{exit, ExitCode}; +use std::{ + env::ArgsOs, + ffi::{OsStr, OsString}, + iter::Peekable, + path::{Path, PathBuf}, + process::ExitCode, +}; +mod cmp; mod context_diff; +mod diff; mod ed_diff; mod macros; mod normal_diff; @@ -19,103 +21,60 @@ mod params; mod unified_diff; mod utils; -fn report_failure_to_read_input_file( - executable: &OsString, - filepath: &OsString, - error: &std::io::Error, -) { - // std::io::Error's display trait outputs "{detail} (os error {code})" - // but we want only the {detail} (error string) part - let error_code_re = Regex::new(r"\ \(os\ error\ \d+\)$").unwrap(); - eprintln!( - "{}: {}: {}", - executable.to_string_lossy(), - filepath.to_string_lossy(), - error_code_re.replace(error.to_string().as_str(), ""), - ); +/// # Panics +/// Panics if the binary path cannot be determined +fn binary_path(args: &mut Peekable) -> PathBuf { + match args.peek() { + Some(ref s) if !s.is_empty() => PathBuf::from(s), + _ => std::env::current_exe().unwrap(), + } +} + +/// #Panics +/// Panics if path has no UTF-8 valid name +fn name(binary_path: &Path) -> &OsStr { + binary_path.file_stem().unwrap() +} + +const VERSION: &str = env!("CARGO_PKG_VERSION"); + +fn usage(name: &str) { + println!("{name} {VERSION} (multi-call binary)\n"); + println!("Usage: {name} [function [arguments...]]\n"); + println!("Currently defined functions:\n"); + println!(" cmp, diff\n"); +} + +fn second_arg_error(name: &OsStr) -> ! { + eprintln!("Expected utility name as second argument, got nothing."); + usage(&name.to_string_lossy()); + std::process::exit(0); } -// Exit codes are documented at -// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-diff.html. -// An exit status of 0 means no differences were found, -// 1 means some differences were found, -// and 2 means trouble. fn main() -> ExitCode { - let opts = env::args_os(); - let params = parse_params(opts).unwrap_or_else(|error| { - eprintln!("{error}"); - exit(2); - }); - // if from and to are the same file, no need to perform any comparison - let maybe_report_identical_files = || { - if params.report_identical_files { - println!( - "Files {} and {} are identical", - params.from.to_string_lossy(), - params.to.to_string_lossy(), - ); - } - }; - if params.from == "-" && params.to == "-" - || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) - { - maybe_report_identical_files(); - return ExitCode::SUCCESS; - } + let mut args = std::env::args_os().peekable(); - // read files - fn read_file_contents(filepath: &OsString) -> io::Result> { - if filepath == "-" { - let mut content = Vec::new(); - io::stdin().read_to_end(&mut content).and(Ok(content)) - } else { - fs::read(filepath) - } - } - let mut io_error = false; - let from_content = match read_file_contents(¶ms.from) { - Ok(from_content) => from_content, - Err(e) => { - report_failure_to_read_input_file(¶ms.executable, ¶ms.from, &e); - io_error = true; - vec![] - } - }; - let to_content = match read_file_contents(¶ms.to) { - Ok(to_content) => to_content, - Err(e) => { - report_failure_to_read_input_file(¶ms.executable, ¶ms.to, &e); - io_error = true; - vec![] - } - }; - if io_error { - return ExitCode::from(2); - } + let exe_path = binary_path(&mut args); + let exe_name = name(&exe_path); - // run diff - let result: Vec = match params.format { - Format::Normal => normal_diff::diff(&from_content, &to_content, ¶ms), - Format::Unified => unified_diff::diff(&from_content, &to_content, ¶ms), - Format::Context => context_diff::diff(&from_content, &to_content, ¶ms), - Format::Ed => ed_diff::diff(&from_content, &to_content, ¶ms).unwrap_or_else(|error| { - eprintln!("{error}"); - exit(2); - }), - }; - if params.brief && !result.is_empty() { - println!( - "Files {} and {} differ", - params.from.to_string_lossy(), - params.to.to_string_lossy() - ); - } else { - io::stdout().write_all(&result).unwrap(); - } - if result.is_empty() { - maybe_report_identical_files(); - ExitCode::SUCCESS + let util_name = if exe_name == "diffutils" { + // Discard the item we peeked. + let _ = args.next(); + + args.peek() + .cloned() + .unwrap_or_else(|| second_arg_error(exe_name)) } else { - ExitCode::from(1) + OsString::from(exe_name) + }; + + match util_name.to_str() { + Some("diff") => diff::main(args), + Some("cmp") => cmp::main(args), + Some(name) => { + eprintln!("{}: utility not supported", name); + ExitCode::from(2) + } + None => second_arg_error(exe_name), } } diff --git a/src/params.rs b/src/params.rs index c671180..9b3abc4 100644 --- a/src/params.rs +++ b/src/params.rs @@ -1,4 +1,5 @@ use std::ffi::OsString; +use std::iter::Peekable; use std::path::PathBuf; use regex::Regex; @@ -41,8 +42,7 @@ impl Default for Params { } } -pub fn parse_params>(opts: I) -> Result { - let mut opts = opts.into_iter().peekable(); +pub fn parse_params>(mut opts: Peekable) -> Result { // parse CLI let Some(executable) = opts.next() else { @@ -323,7 +323,12 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -336,6 +341,7 @@ mod tests { [os("diff"), os("--normal"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); } @@ -350,7 +356,12 @@ mod tests { format: Format::Ed, ..Default::default() }), - parse_params([os("diff"), os(arg), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os(arg), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); } } @@ -368,7 +379,7 @@ mod tests { format: Format::Context, ..Default::default() }), - parse_params(params.iter().map(|x| os(x))) + parse_params(params.iter().map(|x| os(x)).peekable()) ); } for args in [ @@ -390,7 +401,7 @@ mod tests { context_count: 42, ..Default::default() }), - parse_params(params.iter().map(|x| os(x))) + parse_params(params.iter().map(|x| os(x)).peekable()) ); } } @@ -410,7 +421,7 @@ mod tests { let mut params = vec!["diff"]; params.extend(args); params.extend(["foo", "bar"]); - assert!(parse_params(params.iter().map(|x| os(x))).is_err()); + assert!(parse_params(params.iter().map(|x| os(x)).peekable()).is_err()); } } #[test] @@ -427,7 +438,7 @@ mod tests { format: Format::Unified, ..Default::default() }), - parse_params(params.iter().map(|x| os(x))) + parse_params(params.iter().map(|x| os(x)).peekable()) ); } for args in [ @@ -449,7 +460,7 @@ mod tests { context_count: 42, ..Default::default() }), - parse_params(params.iter().map(|x| os(x))) + parse_params(params.iter().map(|x| os(x)).peekable()) ); } } @@ -469,7 +480,7 @@ mod tests { let mut params = vec!["diff"]; params.extend(args); params.extend(["foo", "bar"]); - assert!(parse_params(params.iter().map(|x| os(x))).is_err()); + assert!(parse_params(params.iter().map(|x| os(x)).peekable()).is_err()); } } #[test] @@ -487,6 +498,7 @@ mod tests { [os("diff"), os("-u54"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); assert_eq!( @@ -502,6 +514,7 @@ mod tests { [os("diff"), os("-U54"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); assert_eq!( @@ -517,6 +530,7 @@ mod tests { [os("diff"), os("-U"), os("54"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); assert_eq!( @@ -532,6 +546,7 @@ mod tests { [os("diff"), os("-c54"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); } @@ -544,7 +559,12 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -554,7 +574,12 @@ mod tests { report_identical_files: true, ..Default::default() }), - parse_params([os("diff"), os("-s"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("-s"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -573,6 +598,7 @@ mod tests { ] .iter() .cloned() + .peekable() ) ); } @@ -585,7 +611,12 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -595,7 +626,12 @@ mod tests { brief: true, ..Default::default() }), - parse_params([os("diff"), os("-q"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("-q"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -609,6 +645,7 @@ mod tests { [os("diff"), os("--brief"), os("foo"), os("bar"),] .iter() .cloned() + .peekable() ) ); } @@ -621,7 +658,12 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); for option in ["-t", "--expand-tabs"] { assert_eq!( @@ -636,6 +678,7 @@ mod tests { [os("diff"), os(option), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); } @@ -649,7 +692,12 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("bar")].iter().cloned()) + parse_params( + [os("diff"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) ); assert_eq!( Ok(Params { @@ -663,6 +711,7 @@ mod tests { [os("diff"), os("--tabsize=0"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); assert_eq!( @@ -677,36 +726,42 @@ mod tests { [os("diff"), os("--tabsize=42"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) ); assert!(parse_params( [os("diff"), os("--tabsize"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); assert!(parse_params( [os("diff"), os("--tabsize="), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); assert!(parse_params( [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); assert!(parse_params( [os("diff"), os("--tabsize=-1"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); assert!(parse_params( [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); assert!(parse_params( @@ -718,6 +773,7 @@ mod tests { ] .iter() .cloned() + .peekable() ) .is_err()); } @@ -730,7 +786,12 @@ mod tests { to: os("-h"), ..Default::default() }), - parse_params([os("diff"), os("--"), os("-g"), os("-h")].iter().cloned()) + parse_params( + [os("diff"), os("--"), os("-g"), os("-h")] + .iter() + .cloned() + .peekable() + ) ); } #[test] @@ -742,7 +803,7 @@ mod tests { to: os("-"), ..Default::default() }), - parse_params([os("diff"), os("foo"), os("-")].iter().cloned()) + parse_params([os("diff"), os("foo"), os("-")].iter().cloned().peekable()) ); assert_eq!( Ok(Params { @@ -751,7 +812,7 @@ mod tests { to: os("bar"), ..Default::default() }), - parse_params([os("diff"), os("-"), os("bar")].iter().cloned()) + parse_params([os("diff"), os("-"), os("bar")].iter().cloned().peekable()) ); assert_eq!( Ok(Params { @@ -760,27 +821,45 @@ mod tests { to: os("-"), ..Default::default() }), - parse_params([os("diff"), os("-"), os("-")].iter().cloned()) + parse_params([os("diff"), os("-"), os("-")].iter().cloned().peekable()) ); - assert!(parse_params([os("diff"), os("foo"), os("bar"), os("-")].iter().cloned()).is_err()); - assert!(parse_params([os("diff"), os("-"), os("-"), os("-")].iter().cloned()).is_err()); + assert!(parse_params( + [os("diff"), os("foo"), os("bar"), os("-")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + assert!(parse_params( + [os("diff"), os("-"), os("-"), os("-")] + .iter() + .cloned() + .peekable() + ) + .is_err()); } #[test] fn missing_arguments() { - assert!(parse_params([os("diff")].iter().cloned()).is_err()); - assert!(parse_params([os("diff"), os("foo")].iter().cloned()).is_err()); + assert!(parse_params([os("diff")].iter().cloned().peekable()).is_err()); + assert!(parse_params([os("diff"), os("foo")].iter().cloned().peekable()).is_err()); } #[test] fn unknown_argument() { + assert!(parse_params( + [os("diff"), os("-g"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); assert!( - parse_params([os("diff"), os("-g"), os("foo"), os("bar")].iter().cloned()).is_err() + parse_params([os("diff"), os("-g"), os("bar")].iter().cloned().peekable()).is_err() ); - assert!(parse_params([os("diff"), os("-g"), os("bar")].iter().cloned()).is_err()); - assert!(parse_params([os("diff"), os("-g")].iter().cloned()).is_err()); + assert!(parse_params([os("diff"), os("-g")].iter().cloned().peekable()).is_err()); } #[test] fn empty() { - assert!(parse_params([].iter().cloned()).is_err()); + assert!(parse_params([].iter().cloned().peekable()).is_err()); } #[test] fn conflicting_output_styles() { @@ -797,6 +876,7 @@ mod tests { [os("diff"), os(arg1), os(arg2), os("foo"), os("bar")] .iter() .cloned() + .peekable() ) .is_err()); } diff --git a/src/utils.rs b/src/utils.rs index df1390d..88b39ff 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -3,8 +3,9 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use std::io::Write; +use std::{ffi::OsString, io::Write}; +use regex::Regex; use unicode_width::UnicodeWidthStr; /// Replace tabs by spaces in the input line. @@ -71,6 +72,33 @@ pub fn get_modification_time(file_path: &str) -> String { modification_time } +pub fn format_failure_to_read_input_file( + executable: &OsString, + filepath: &OsString, + error: &std::io::Error, +) -> String { + // std::io::Error's display trait outputs "{detail} (os error {code})" + // but we want only the {detail} (error string) part + let error_code_re = Regex::new(r"\ \(os\ error\ \d+\)$").unwrap(); + format!( + "{}: {}: {}", + executable.to_string_lossy(), + filepath.to_string_lossy(), + error_code_re.replace(error.to_string().as_str(), ""), + ) +} + +pub fn report_failure_to_read_input_file( + executable: &OsString, + filepath: &OsString, + error: &std::io::Error, +) { + eprintln!( + "{}", + format_failure_to_read_input_file(executable, filepath, error) + ); +} + #[cfg(test)] mod tests { use super::*; diff --git a/tests/integration.rs b/tests/integration.rs index f8ad515..4cff8ff 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -4,298 +4,869 @@ // files that was distributed with this source code. use assert_cmd::cmd::Command; -use diffutilslib::assert_diff_eq; use predicates::prelude::*; -use std::fs::File; +use std::fs::{File, OpenOptions}; use std::io::Write; use tempfile::{tempdir, NamedTempFile}; // Integration tests for the diffutils command +mod common { + use super::*; -#[test] -fn unknown_param() -> Result<(), Box> { - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("--foobar"); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::starts_with("Unknown option: \"--foobar\"")); - Ok(()) -} + #[test] + fn unknown_param() -> Result<(), Box> { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("patch"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::eq("patch: utility not supported\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::starts_with( + "Expected utility name as second argument, got nothing.\n", + )); + + for subcmd in ["diff", "cmp"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg(subcmd); + cmd.arg("--foobar"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::starts_with("Unknown option: \"--foobar\"")); + } + Ok(()) + } -#[test] -fn cannot_read_files() -> Result<(), Box> { - let file = NamedTempFile::new()?; + #[test] + fn cannot_read_files() -> Result<(), Box> { + let file = NamedTempFile::new()?; + + let nofile = NamedTempFile::new()?; + let nopath = nofile.into_temp_path(); + std::fs::remove_file(&nopath)?; + + #[cfg(not(windows))] + let error_message = "No such file or directory"; + #[cfg(windows)] + let error_message = "The system cannot find the file specified."; + + for subcmd in ["diff", "cmp"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg(subcmd); + cmd.arg(&nopath).arg(file.path()); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::ends_with(format!( + ": {}: {error_message}\n", + &nopath.as_os_str().to_string_lossy() + ))); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg(subcmd); + cmd.arg(file.path()).arg(&nopath); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::ends_with(format!( + ": {}: {error_message}\n", + &nopath.as_os_str().to_string_lossy() + ))); + } - let nofile = NamedTempFile::new()?; - let nopath = nofile.into_temp_path(); - std::fs::remove_file(&nopath)?; + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg(&nopath).arg(&nopath); + cmd.assert().code(predicate::eq(2)).failure().stderr( + predicate::str::contains(format!( + ": {}: {error_message}\n", + &nopath.as_os_str().to_string_lossy() + )) + .count(2), + ); - #[cfg(not(windows))] - let error_message = "No such file or directory"; - #[cfg(windows)] - let error_message = "The system cannot find the file specified."; - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg(&nopath).arg(file.path()); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::ends_with(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - ))); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg(file.path()).arg(&nopath); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::ends_with(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - ))); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg(&nopath).arg(&nopath); - cmd.assert().code(predicate::eq(2)).failure().stderr( - predicate::str::contains(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - )) - .count(2), - ); - - Ok(()) + Ok(()) + } } -#[test] -fn no_differences() -> Result<(), Box> { - let file = NamedTempFile::new()?; - for option in ["", "-u", "-c", "-e"] { - let mut cmd = Command::cargo_bin("diffutils")?; - if !option.is_empty() { - cmd.arg(option); +mod diff { + use diffutilslib::assert_diff_eq; + + use super::*; + + #[test] + fn no_differences() -> Result<(), Box> { + let file = NamedTempFile::new()?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg(file.path()).arg(file.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::str::is_empty()); + } + Ok(()) + } + + #[test] + fn no_differences_report_identical_files() -> Result<(), Box> { + // same file + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-s").arg(file1.path()).arg(file1.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::eq(format!( + "Files {} and {} are identical\n", + file1.path().to_string_lossy(), + file1.path().to_string_lossy(), + ))); + } + // two files with the same content + let mut file2 = NamedTempFile::new()?; + file2.write_all("foo\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-s").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::eq(format!( + "Files {} and {} are identical\n", + file1.path().to_string_lossy(), + file2.path().to_string_lossy(), + ))); + } + Ok(()) + } + + #[test] + fn differences() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::is_empty().not()); + } + Ok(()) + } + + #[test] + fn differences_brief() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-q").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::eq(format!( + "Files {} and {} differ\n", + file1.path().to_string_lossy(), + file2.path().to_string_lossy() + ))); } - cmd.arg(file.path()).arg(file.path()); + Ok(()) + } + + #[test] + fn missing_newline() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar".as_bytes())?; + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-e").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::starts_with("No newline at end of file")); + Ok(()) + } + + #[test] + fn read_from_stdin() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u") + .arg(file1.path()) + .arg("-") + .write_stdin("bar\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ -\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file1.path().to_string_lossy() + ) + ); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u") + .arg("-") + .arg(file2.path()) + .write_stdin("foo\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- -\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file2.path().to_string_lossy() + ) + ); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u").arg("-").arg("-"); cmd.assert() .code(predicate::eq(0)) .success() .stdout(predicate::str::is_empty()); + + #[cfg(unix)] + { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u") + .arg(file1.path()) + .arg("/dev/stdin") + .write_stdin("bar\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ /dev/stdin\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file1.path().to_string_lossy() + ) + ); + } + + Ok(()) + } + + #[test] + fn compare_file_to_directory() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let directory = tmp_dir.path().join("d"); + let _ = std::fs::create_dir(&directory); + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let da_path = directory.join("a"); + let mut da = File::create(&da_path).unwrap(); + da.write_all(b"da\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u").arg(&directory).arg(&a_path); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-da\n+a\n", + da_path.display(), + a_path.display() + ) + ); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u").arg(&a_path).arg(&directory); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-a\n+da\n", + a_path.display(), + da_path.display() + ) + ); + + Ok(()) } - Ok(()) } -#[test] -fn no_differences_report_identical_files() -> Result<(), Box> { - // same file - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { +mod cmp { + use super::*; + + #[test] + fn cmp_incompatible_params() -> Result<(), Box> { let mut cmd = Command::cargo_bin("diffutils")?; - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg("-s").arg(file1.path()).arg(file1.path()); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-s"); + cmd.arg("/etc/passwd").arg("/etc/group"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::ends_with( + ": options -l and -s are incompatible\n", + )); + + Ok(()) + } + + #[test] + fn cmp_stdin() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg(&a_path); + cmd.write_stdin("a\n"); cmd.assert() .code(predicate::eq(0)) .success() - .stdout(predicate::eq(format!( - "Files {} and {} are identical\n", - file1.path().to_string_lossy(), - file1.path().to_string_lossy(), - ))); + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path); + cmd.write_stdin("b\n"); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with(" - differ: char 1, line 1\n")); + + Ok(()) } - // two files with the same content - let mut file2 = NamedTempFile::new()?; - file2.write_all("foo\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { + + #[test] + fn cmp_equal_files() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"a\n").unwrap(); + let mut cmd = Command::cargo_bin("diffutils")?; - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg("-s").arg(file1.path()).arg(file2.path()); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); cmd.assert() .code(predicate::eq(0)) .success() - .stdout(predicate::eq(format!( - "Files {} and {} are identical\n", - file1.path().to_string_lossy(), - file2.path().to_string_lossy(), - ))); + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + Ok(()) } - Ok(()) -} -#[test] -fn differences() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { + #[test] + fn cmp_one_file_empty() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let _ = File::create(&b_path).unwrap(); + let mut cmd = Command::cargo_bin("diffutils")?; - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg(file1.path()).arg(file2.path()); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); cmd.assert() .code(predicate::eq(1)) .failure() - .stdout(predicate::str::is_empty().not()); + .stderr(predicate::str::contains(" EOF on ")) + .stderr(predicate::str::ends_with(" which is empty\n")); + + Ok(()) } - Ok(()) -} -#[test] -fn differences_brief() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { + #[test] + fn cmp_immediate_difference() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"bcd\n").unwrap(); + let mut cmd = Command::cargo_bin("diffutils")?; - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg("-q").arg(file1.path()).arg(file2.path()); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); cmd.assert() .code(predicate::eq(1)) .failure() - .stdout(predicate::eq(format!( - "Files {} and {} differ\n", - file1.path().to_string_lossy(), - file2.path().to_string_lossy() - ))); + .stdout(predicate::str::ends_with(" differ: char 1, line 1\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with( + " differ: byte 1, line 1 is 141 a 142 b\n", + )); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq("1 141 142\n2 142 143\n3 143 144\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq( + "1 141 a 142 b\n2 142 b 143 c\n3 143 c 144 d\n", + )); + + Ok(()) } - Ok(()) -} -#[test] -fn missing_newline() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar".as_bytes())?; - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("-e").arg(file1.path()).arg(file2.path()); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::starts_with("No newline at end of file")); - Ok(()) -} + #[test] + fn cmp_newline_difference() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc\ndefg").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"abc\ndef\ng").unwrap(); -#[test] -fn read_from_stdin() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar\n".as_bytes())?; - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("-u") - .arg(file1.path()) - .arg("-") - .write_stdin("bar\n"); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ -\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", - file1.path().to_string_lossy() - ) - ); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("-u") - .arg("-") - .arg(file2.path()) - .write_stdin("foo\n"); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- -\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", - file2.path().to_string_lossy() - ) - ); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("-u").arg("-").arg("-"); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stdout(predicate::str::is_empty()); - - #[cfg(unix)] - { let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("-u") - .arg(file1.path()) - .arg("/dev/stdin") - .write_stdin("bar\n"); - cmd.assert().code(predicate::eq(1)).failure(); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with(" differ: char 8, line 2\n")); - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ /dev/stdin\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", - file1.path().to_string_lossy() - ) - ); + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with( + " differ: byte 8, line 2 is 147 g 12 ^J\n", + )); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::starts_with("8 147 12\n")) + .stderr(predicate::str::contains(" EOF on")) + .stderr(predicate::str::ends_with(" after byte 8\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg("-l"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::starts_with("8 147 g 12 ^J\n")) + .stderr(predicate::str::contains(" EOF on")) + .stderr(predicate::str::ends_with(" after byte 8\n")); + + Ok(()) } - Ok(()) -} + #[test] + fn cmp_max_bytes() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc efg ijkl\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"abcdefghijkl\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-n"); + cmd.arg("3"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-n"); + cmd.arg("4"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq("4 40 144 d\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-n"); + cmd.arg("13"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq("4 40 144 d\n8 40 150 h\n")); + Ok(()) + } + + #[test] + fn cmp_skip_args_parsing() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"---abc\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"###abc\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-i"); + cmd.arg("3"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); -#[test] -fn compare_file_to_directory() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - let directory = tmp_dir.path().join("d"); - let _ = std::fs::create_dir(&directory); - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - a.write_all(b"a\n").unwrap(); - - let da_path = directory.join("a"); - let mut da = File::create(&da_path).unwrap(); - da.write_all(b"da\n").unwrap(); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("-u").arg(&directory).arg(&a_path); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-da\n+a\n", - da_path.display(), - a_path.display() - ) - ); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("-u").arg(&a_path).arg(&directory); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-a\n+da\n", - a_path.display(), - da_path.display() - ) - ); - - Ok(()) + // Positional skips should be ignored + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-i"); + cmd.arg("3"); + cmd.arg(&a_path).arg(&b_path); + cmd.arg("1").arg("1"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + // Single positional argument should only affect first file. + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.arg("3"); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with(" differ: char 1, line 1\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.arg("3"); + cmd.arg("3"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + Ok(()) + } + + #[test] + fn cmp_skip_suffix_parsing() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + write!(a, "{}c\n", "a".repeat(1024)).unwrap(); + a.flush().unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + write!(b, "{}c\n", "b".repeat(1024)).unwrap(); + b.flush().unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("--ignore-initial=1K"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + Ok(()) + } + + #[test] + fn cmp_skip() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc efg ijkl\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"abcdefghijkl\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-i"); + cmd.arg("8"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg("-i"); + cmd.arg("4"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with( + " differ: byte 4, line 1 is 40 150 h\n", + )); + + Ok(()) + } + + #[test] + fn cmp_binary() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let mut bytes = vec![0, 15, 31, 32, 33, 40, 64, 126, 127, 128, 129, 200, 254, 255]; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(&bytes).unwrap(); + + bytes.reverse(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(&bytes).unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::eq(concat!( + " 1 0 ^@ 377 M-^?\n", + " 2 17 ^O 376 M-~\n", + " 3 37 ^_ 310 M-H\n", + " 4 40 201 M-^A\n", + " 5 41 ! 200 M-^@\n", + " 6 50 ( 177 ^?\n", + " 7 100 @ 176 ~\n", + " 8 176 ~ 100 @\n", + " 9 177 ^? 50 (\n", + "10 200 M-^@ 41 !\n", + "11 201 M-^A 40 \n", + "12 310 M-H 37 ^_\n", + "13 376 M-~ 17 ^O\n", + "14 377 M-^? 0 ^@\n" + ))); + + Ok(()) + } + + #[test] + #[cfg(not(windows))] + fn cmp_fast_paths() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + // This test mimics one found in the GNU cmp test suite. It is used for + // validating the /dev/null optimization. + let a_path = tmp_dir.path().join("a"); + let a = File::create(&a_path).unwrap(); + a.set_len(14 * 1024 * 1024 * 1024 * 1024).unwrap(); + + let b_path = tmp_dir.path().join("b"); + let b = File::create(&b_path).unwrap(); + b.set_len(15 * 1024 * 1024 * 1024 * 1024).unwrap(); + + let dev_null = OpenOptions::new().write(true).open("/dev/null").unwrap(); + + let mut child = std::process::Command::new(assert_cmd::cargo::cargo_bin("diffutils")) + .arg("cmp") + .arg(&a_path) + .arg(&b_path) + .stdout(dev_null) + .spawn() + .unwrap(); + + std::thread::sleep(std::time::Duration::from_millis(100)); + + assert_eq!(child.try_wait().unwrap().unwrap().code(), Some(1)); + + // Two stdins should be equal + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-"); + cmd.arg("-"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::str::is_empty()) + .stderr(predicate::str::is_empty()); + + // Files with longer than block size equal segments should still report + // the correct line number for the difference. Assumes 8KB block size (see + // https://github.com/rust-lang/rust/blob/master/library/std/src/sys_common/io.rs), + // create a 24KB equality. + let mut bytes = " ".repeat(4095); + bytes.push('\n'); + bytes.push_str(&" ".repeat(4096)); + + let bytes = bytes.repeat(3); + let bytes = bytes.as_bytes(); + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(&bytes).unwrap(); + a.write_all(b"A").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(&bytes).unwrap(); + b.write_all(b"B").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::ends_with(" differ: byte 24577, line 4\n")); + + Ok(()) + } } diff --git a/tests/run-upstream-testsuite.sh b/tests/run-upstream-testsuite.sh index cfc20a9..7a9e69d 100755 --- a/tests/run-upstream-testsuite.sh +++ b/tests/run-upstream-testsuite.sh @@ -21,7 +21,7 @@ # (e.g. 'dev' or 'test'). # Unless overridden by the $TESTS environment variable, all tests in the test # suite will be run. Tests targeting a command that is not yet implemented -# (e.g. cmp, diff3 or sdiff) are skipped. +# (e.g. diff3 or sdiff) are skipped. scriptpath=$(dirname "$(readlink -f "$0")") rev=$(git rev-parse HEAD) @@ -57,6 +57,7 @@ upstreamrev=$(git rev-parse HEAD) mkdir src cd src ln -s "$binary" diff +ln -s "$binary" cmp cd ../tests # Fetch tests/init.sh from the gnulib repository (needed since @@ -86,9 +87,9 @@ for test in $tests do result="FAIL" url="$urlroot$test?id=$upstreamrev" - # Run only the tests that invoke `diff`, + # Run only the tests that invoke `diff` or `cmp`, # because other binaries aren't implemented yet - if ! grep -E -s -q "(cmp|diff3|sdiff)" "$test" + if ! grep -E -s -q "(diff3|sdiff)" "$test" then sh "$test" 1> stdout.txt 2> stderr.txt && result="PASS" || exitcode=1 json+="{\"test\":\"$test\",\"result\":\"$result\","