diff --git a/CHANGELOG-foldiff.md b/CHANGELOG-foldiff.md index bf303e3..6f07011 100644 --- a/CHANGELOG-foldiff.md +++ b/CHANGELOG-foldiff.md @@ -6,6 +6,10 @@ - replace `anyhow` with custom error types - write custom threading utilities +## pending +- use `camino` for better path handling internally +- remove unused dependency on serde-bytes + ## 1.3.1 - reflinks now apply for duplicated files too diff --git a/Cargo.lock b/Cargo.lock index a75341c..be796aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,6 +75,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "camino" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3" + [[package]] name = "cc" version = "1.1.22" @@ -256,6 +262,7 @@ name = "foldiff" version = "1.3.1" dependencies = [ "anyhow", + "camino", "clap", "console", "dialoguer", @@ -350,6 +357,7 @@ name = "libfoldiff" version = "1.3.1" dependencies = [ "anyhow", + "camino", "countio", "derivative", "infer", @@ -359,7 +367,6 @@ dependencies = [ "reflink", "rmp-serde", "serde", - "serde_bytes", "tempfile", "twox-hash", "zstd", @@ -560,15 +567,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde_bytes" -version = "0.11.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "387cc504cb06bb40a96c8e04e951fe01854cf6bc921053c954e4a606d9675c6a" -dependencies = [ - "serde", -] - [[package]] name = "serde_derive" version = "1.0.210" diff --git a/Cargo.toml b/Cargo.toml index 215d19b..4ac1b7d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,4 +3,7 @@ resolver = "2" members = [ "foldiff", "libfoldiff", -] \ No newline at end of file +] + +#[profile.test] +#opt-level = 3 \ No newline at end of file diff --git a/foldiff/Cargo.toml b/foldiff/Cargo.toml index aadc222..22ff7e4 100644 --- a/foldiff/Cargo.toml +++ b/foldiff/Cargo.toml @@ -14,3 +14,4 @@ dialoguer = { version = "0.11.0", features = [] } indicatif = "0.17.8" console = "0.15.8" num_cpus = "1.16.0" +camino = "1.1.9" diff --git a/foldiff/src/main.rs b/foldiff/src/main.rs index 6179782..db96ff0 100644 --- a/foldiff/src/main.rs +++ b/foldiff/src/main.rs @@ -1,7 +1,7 @@ use std::fs::File; use anyhow::{bail, ensure, Context, Result}; +use camino::{Utf8Path, Utf8PathBuf}; use clap::{Parser, Subcommand}; -use std::path::{Path, PathBuf}; use libfoldiff::FoldiffCfg; use libfoldiff::manifest::DiffManifest; @@ -105,8 +105,8 @@ fn main() -> Result<()> { level_diff: *level_diff }; - let old_root: PathBuf = old.into(); - let new_root: PathBuf = new.into(); + let old_root: Utf8PathBuf = old.into(); + let new_root: Utf8PathBuf = new.into(); // check both exist ensure!(std::fs::metadata(&old_root).context("old path must exist")?.is_dir(), "old path must be a directory"); ensure!(std::fs::metadata(&new_root).context("new path must exist")?.is_dir(), "new path must be a directory"); @@ -136,12 +136,12 @@ fn main() -> Result<()> { //println!("{diff_state:?}"); // emit the diff to disk - diff_state.write_to_file::>(Path::new(diff), &cfg)?; + diff_state.write_to_file::>(Utf8Path::new(diff), &cfg)?; } Commands::Apply { old, diff, new } => { - let old_root: PathBuf = old.into(); - let new_root: PathBuf = new.into(); + let old_root: Utf8PathBuf = old.into(); + let new_root: Utf8PathBuf = new.into(); // check existence ensure!(std::fs::metadata(&old_root).context("old path must exist")?.is_dir(), "old path must be a directory"); ensure!(std::fs::metadata(diff).context("diff must exist")?.is_file(), "diff must be a file"); @@ -158,7 +158,7 @@ fn main() -> Result<()> { std::fs::remove_dir_all(new).context("Failed to remove folder")?; } - let mut diff_state = libfoldiff::applying::read_diff_from_file(&PathBuf::from(diff))?; + let mut diff_state = libfoldiff::applying::read_diff_from_file(&Utf8PathBuf::from(diff))?; diff_state.apply::< cliutils::MultiWrapper, cliutils::Spinner, @@ -169,10 +169,10 @@ fn main() -> Result<()> { if let Some(diff) = diff { let f = File::open(diff).context("Failed to open diff file to verify with")?; let manifest = DiffManifest::read_from(f).context("Failed to read diff file to verify with")?; - libfoldiff::verify::verify_against_diff::>(Path::new(old), Path::new(new), &manifest)?; + libfoldiff::verify::verify_against_diff::>(old.as_str().into(), new.as_str().into(), &manifest)?; } else { - libfoldiff::verify::test_dir_equality::>(Path::new(old), Path::new(new))?; + libfoldiff::verify::test_dir_equality::>(old.as_str().into(), new.as_str().into())?; } }, Commands::Upgrade { new, old } => { diff --git a/libfoldiff/Cargo.toml b/libfoldiff/Cargo.toml index 0f247e5..b46584b 100644 --- a/libfoldiff/Cargo.toml +++ b/libfoldiff/Cargo.toml @@ -13,12 +13,12 @@ derivative = "2.2.0" infer = "0.16.0" rmp-serde = "1.3.0" serde = { version = "1.0.209", features = ["derive"] } -serde_bytes = "0.11.15" twox-hash = "1.6.3" zstd = { version = "0.13.2", features = ["zstdmt"] } rayon = "1.10.0" memmap2 = "0.9.4" reflink = "0.1.3" +camino = "1.1.9" [dev-dependencies] tempfile = "3.12.0" diff --git a/libfoldiff/src/applying.rs b/libfoldiff/src/applying.rs index 42abcc4..9d9cf08 100644 --- a/libfoldiff/src/applying.rs +++ b/libfoldiff/src/applying.rs @@ -7,7 +7,7 @@ use memmap2::Mmap; use rayon::prelude::*; use std::fs::File; use std::io::{Cursor, Read, Seek}; -use std::path::{Path, PathBuf}; +use camino::{Utf8Path, Utf8PathBuf}; use std::sync::Mutex; /// An in-memory representation of a diff, used for the applying process @@ -17,8 +17,8 @@ pub struct ApplyingDiff { blobs_new: Vec, // offset into diff file blobs_patch: Vec, // offset into diff file read: Option, // the diff file map - old_root: PathBuf, - new_root: PathBuf, + old_root: Utf8PathBuf, + new_root: Utf8PathBuf, } impl ApplyingDiff { @@ -26,7 +26,7 @@ impl ApplyingDiff { TWrap: ReportingMultiWrapper, TSpin: Reporter + CanBeWrappedBy + Sync, TBar: ReporterSized + CanBeWrappedBy + Sync - >(&mut self, old_root: PathBuf, new_root: PathBuf) -> anyhow::Result<()> { + >(&mut self, old_root: Utf8PathBuf, new_root: Utf8PathBuf) -> anyhow::Result<()> { self.old_root = old_root; self.new_root = new_root; @@ -297,7 +297,7 @@ impl ApplyingDiff { } /// handles initialising an in-memory applying state from disk -pub fn read_diff_from_file(path: &Path) -> anyhow::Result { +pub fn read_diff_from_file(path: &Utf8Path) -> anyhow::Result { let f = File::open(path).context("Failed to open file to read diff")?; // safety: UB if the underlying diff is modified by someone else @@ -317,7 +317,7 @@ pub fn read_diff_from(reader: &mut (impl Read + Seek)) -> anyhow::Result anyhow::Result anyhow::Result std::io::Result { +pub fn create_file(p: &Utf8Path) -> std::io::Result { if let Some(p) = p.parent() { std::fs::create_dir_all(p)?; } @@ -25,7 +25,7 @@ pub fn create_file(p: &Path) -> std::io::Result { } // Reflinks or copies a file and hashes it -pub fn copy_rl_hash(src_p: impl AsRef, dst_p: impl AsRef) -> anyhow::Result { +pub fn copy_rl_hash(src_p: impl AsRef, dst_p: impl AsRef) -> anyhow::Result { let src_p = src_p.as_ref(); let dst_p = dst_p.as_ref(); @@ -47,7 +47,7 @@ pub fn copy_rl_hash(src_p: impl AsRef, dst_p: impl AsRef) -> anyhow: } } -pub fn copy_rl(src_p: impl AsRef, dst_p: impl AsRef) -> std::io::Result<()> { +pub fn copy_rl(src_p: impl AsRef, dst_p: impl AsRef) -> std::io::Result<()> { let src_p = src_p.as_ref(); let dst_p = dst_p.as_ref(); diff --git a/libfoldiff/src/diffing.rs b/libfoldiff/src/diffing.rs index 4ce992a..d3ec2db 100644 --- a/libfoldiff/src/diffing.rs +++ b/libfoldiff/src/diffing.rs @@ -1,7 +1,7 @@ use std::collections::BTreeMap; use std::fs::File; use std::io::{copy, Seek, Write}; -use std::path::{Path, PathBuf}; +use camino::{Utf8Path, Utf8PathBuf}; use anyhow::{anyhow, bail, Context}; use rmp_serde::Serializer; use serde::Serialize; @@ -14,27 +14,27 @@ use crate::reporting::{AutoSpin, Reporter, ReporterSized}; /// An in-memory representation of a diff, used for the diff creation process #[derive(Clone, Debug, Default)] pub struct DiffingDiff { - blobs_new: Vec, - blobs_patch: Vec, - old_root: PathBuf, - new_root: PathBuf, + blobs_new: Vec, + blobs_patch: Vec, + old_root: Utf8PathBuf, + new_root: Utf8PathBuf, files: BTreeMap, // for efficient lookups, must be kept in sync - file_paths_old: BTreeMap, - file_paths_new: BTreeMap, + file_paths_old: BTreeMap, + file_paths_new: BTreeMap, } /// the looked up value of DiffingDiff::files entries #[derive(Clone, Debug)] struct DiffingFileData { - paths_old: Vec, - paths_new: Vec, + paths_old: Vec, + paths_new: Vec, inferred_mime: Option<&'static str>, } impl DiffingDiff { - pub fn new(old_root: PathBuf, new_root: PathBuf) -> Self { + pub fn new(old_root: Utf8PathBuf, new_root: Utf8PathBuf) -> Self { Self { old_root, new_root, @@ -123,7 +123,7 @@ impl DiffingDiff { Ok(()) } - pub fn write_to_file(&mut self, path: &Path, cfg: &FoldiffCfg) -> anyhow::Result<()> { + pub fn write_to_file(&mut self, path: &Utf8Path, cfg: &FoldiffCfg) -> anyhow::Result<()> { // create file let mut f = File::create_new(path).context("Failed to create file to save diff")?; @@ -139,14 +139,13 @@ impl DiffingDiff { // and figure out what blobs must be generated by write_to, and generate the manifest. // convenience func - let path_to_string = |p: &PathBuf| -> anyhow::Result { - let str = p.to_str().ok_or(anyhow!("Found a non-UTF-8 path name. Just no. Why."))?; + let path_to_string = |p: &Utf8PathBuf| -> anyhow::Result { Ok(if cfg!(windows) { // path replacement assert!(p.is_relative(), "Cannot fix separators in a non-relative path, as this is not accepted by the windows apis for verbatim paths. This should never occur as the diff manifest only contains relative paths."); - str.replace('\\', "/") + p.as_str().replace('\\', "/") } else { - str.to_string() + p.to_string() }) }; @@ -252,7 +251,7 @@ impl DiffingDiff { /// adds a new file to the diff /// you should not pass a file that is already in the diff - this will return an Err - fn add_file(&mut self, in_new: bool, path: &Path) -> anyhow::Result<()> { + fn add_file(&mut self, in_new: bool, path: &Utf8Path) -> anyhow::Result<()> { // check if the path is already there let paths = if in_new { &mut self.file_paths_new } else { &mut self.file_paths_old }; if paths.contains_key(path) { @@ -290,7 +289,7 @@ impl DiffingDiff { Ok(()) } - fn scan_internal(&mut self, dir: &Path, new: bool, spn: &impl Reporter) -> anyhow::Result<()> { + fn scan_internal(&mut self, dir: &Utf8Path, new: bool, spn: &impl Reporter) -> anyhow::Result<()> { let root = if new { &self.new_root } else { &self.old_root }; // we need to clone this, aw let root = root.clone(); @@ -309,7 +308,11 @@ impl DiffingDiff { bail!("Entry at '{:?}' is a symlink, bailing", entry.path()); } // strip the root off the front of the path else we get errors - let path = entry.path(); + let path: Utf8PathBuf = match entry.path().try_into() + { + Ok(p) => p, + Err(_) => continue, // just ignore non-UTF-8 paths! + }; let path = path.strip_prefix(&root)?; if ftype.is_dir() { // recurse @@ -325,17 +328,17 @@ impl DiffingDiff { } } -pub fn scan_to_diff(old_root: PathBuf, new_root: PathBuf) -> anyhow::Result { +pub fn scan_to_diff(old_root: Utf8PathBuf, new_root: Utf8PathBuf) -> anyhow::Result { let mut new_self = DiffingDiff::new(old_root, new_root); let spn = TSpin::new("Scanning old files"); let aspn = AutoSpin::spin(&spn); - new_self.scan_internal(Path::new(""), false, &spn)?; + new_self.scan_internal(Utf8Path::new(""), false, &spn)?; aspn.all_good(); let spn = TSpin::new("Scanning new files"); let aspn = AutoSpin::spin(&spn); - new_self.scan_internal(Path::new(""), true, &spn)?; + new_self.scan_internal(Utf8Path::new(""), true, &spn)?; aspn.all_good(); Ok(new_self) diff --git a/libfoldiff/src/hash.rs b/libfoldiff/src/hash.rs index 3ff9118..0daec80 100644 --- a/libfoldiff/src/hash.rs +++ b/libfoldiff/src/hash.rs @@ -1,7 +1,7 @@ use std::fs::File; use std::hash::Hasher; use std::io::{Read, Write}; -use std::path::Path; +use camino::Utf8Path; use twox_hash::XxHash64; #[derive(Clone, Default)] @@ -36,7 +36,7 @@ pub fn hash_stream(s: &mut impl Read) -> std::io::Result { Ok(h.finish()) } -pub fn hash_file(p: &Path) -> anyhow::Result { +pub fn hash_file(p: &Utf8Path) -> anyhow::Result { Ok(hash_stream(&mut File::open(p)?)?) } diff --git a/libfoldiff/src/reporting.rs b/libfoldiff/src/reporting.rs index 9552444..98a0499 100644 --- a/libfoldiff/src/reporting.rs +++ b/libfoldiff/src/reporting.rs @@ -71,7 +71,7 @@ impl<'a, R: Reporter+Sync> AutoSpin<'a, R> { s } - + /// finishes autospinning then calls done() on the internal object. /// mainly useful to extend the lifetime of autospin in a neater way than explicit drop(). pub fn all_good(self) { diff --git a/libfoldiff/src/verify.rs b/libfoldiff/src/verify.rs index d986a89..59c1633 100644 --- a/libfoldiff/src/verify.rs +++ b/libfoldiff/src/verify.rs @@ -5,19 +5,19 @@ use anyhow::{bail, Context, Result}; use rayon::prelude::*; use std::collections::BTreeSet; use std::fs; -use std::path::Path; +use camino::{Utf8Path, Utf8PathBuf}; use crate::reporting::{AutoSpin, Reporter}; /// Checks if two directories are identical, printing results to stdout -pub fn test_dir_equality(r1: &Path, r2: &Path) -> Result<()> { +pub fn test_dir_equality(r1: &Utf8Path, r2: &Utf8Path) -> Result<()> { let spn = TSpin::new("Scanning folders"); let aspn = AutoSpin::spin(&spn); - test_equality_internal(r1, r2, Path::new(""), &spn)?; + test_equality_internal(r1, r2, "".into(), &spn)?; aspn.all_good(); Ok(()) } -fn test_equality_internal(r1: &Path, r2: &Path, p: &Path, spn: &(impl Reporter+Sync)) -> Result<()> { +fn test_equality_internal(r1: &Utf8Path, r2: &Utf8Path, p: &Utf8Path, spn: &(impl Reporter+Sync)) -> Result<()> { // stat both paths let path1 = r1.join(p); let path2 = r2.join(p); @@ -45,8 +45,8 @@ fn test_equality_internal(r1: &Path, r2: &Path, p: &Path, spn: &(impl Reporter+S spn.suspend(|| { println!( "{:?} is a file, but {:?} is a folder, thus they mismatch.", - Path::new(r1.file_name().unwrap()).join(p), - Path::new(r2.file_name().unwrap()).join(p) + Utf8Path::new(r1.file_name().unwrap()).join(p), + Utf8Path::new(r2.file_name().unwrap()).join(p) ); }); } @@ -55,8 +55,8 @@ fn test_equality_internal(r1: &Path, r2: &Path, p: &Path, spn: &(impl Reporter+S spn.suspend(|| { println!( "{:?} is a folder, but {:?} is a file, thus they mismatch.", - Path::new(r1.file_name().unwrap()).join(p), - Path::new(r2.file_name().unwrap()).join(p) + Utf8Path::new(r1.file_name().unwrap()).join(p), + Utf8Path::new(r2.file_name().unwrap()).join(p) ); }); } @@ -66,8 +66,8 @@ fn test_equality_internal(r1: &Path, r2: &Path, p: &Path, spn: &(impl Reporter+S let files1: std::io::Result> = fs::read_dir(path1)?.collect(); let files2: std::io::Result> = fs::read_dir(path2)?.collect(); - let set1 = BTreeSet::from_iter(files1?.iter().map(|e| e.file_name())); - let set2 = BTreeSet::from_iter(files2?.iter().map(|e| e.file_name())); + let set1 = BTreeSet::::from_iter(files1?.iter().filter_map(|e| e.file_name().to_str().map(Into::into))); + let set2 = BTreeSet::::from_iter(files2?.iter().filter_map(|e| e.file_name().to_str().map(Into::into))); let mut rec_res = anyhow::Ok(()); // do the loops in parallel @@ -110,10 +110,10 @@ fn test_equality_internal(r1: &Path, r2: &Path, p: &Path, spn: &(impl Reporter+S } /// Checks if two directories match the given manifest, printing results to stdout -pub fn verify_against_diff(r1: &Path, r2: &Path, manifest: &DiffManifest) -> Result<()> { +pub fn verify_against_diff(r1: &Utf8Path, r2: &Utf8Path, manifest: &DiffManifest) -> Result<()> { let spn = TSpin::new("Verifying files"); let aspn = AutoSpin::spin(&spn); - + let errors: Vec<_> = manifest.untouched_files .par_iter()