From 207b75b9bd32f0660b680bbde40bb63b76f9afb6 Mon Sep 17 00:00:00 2001 From: willcl-ark Date: Tue, 4 Jun 2024 20:33:59 +0100 Subject: [PATCH] cli: add support for path globbing to ignore_paths --- Cargo.lock | 7 ++++++ Cargo.toml | 1 + README.md | 4 ++-- src/cli.rs | 50 ++++++++++++++++++++++++++++++++++++++++- tests/file_traversal.rs | 26 ++++++++++++++++++++- 5 files changed, 84 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c3b5211..2d68a70 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -808,6 +808,12 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "gloo-timers" version = "0.2.6" @@ -1127,6 +1133,7 @@ dependencies = [ "colored", "criterion", "futures", + "glob", "lazy_static", "log", "ntest", diff --git a/Cargo.toml b/Cargo.toml index 5547357..d47b9b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ pulldown-cmark = "0.9.6" toml = "0.8.13" serde = { version = "1.0.202", features = ["derive"] } url-escape = "0.1.1" +glob = "0.3.1" [dev-dependencies] ntest = "0.9.2" diff --git a/README.md b/README.md index c1e93e0..e80646f 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ The following arguments are available: | `--offline` | `-o` | Do not check any web links. Renamed from `--no-web-links` which is still an alias for downwards compatibility | | `--match-file-extension` | `-e` | Set the flag, if the file extension shall be checked as well. For example the following markup link `[link](dir/file)` matches if for example a file called `file.md` exists in `dir`, but would fail when the `--match-file-extension` flag is set. | | `--version` | `-V` | Print current version of mlc | -| `--ignore-path` | `-p` | Comma separated list of directories or files which shall be ignored. For example | +| `--ignore-path` | `-p` | Comma separated list of directories or files which shall be ignored. Can use globbing. For example: `--ignore-path "./ignore-me","./src","./build-*"` | | `--ignore-links` | `-i` | Comma separated list of links which shall be ignored. Use simple `?` and `*` wildcards. For example `--ignore-links "http*://crates.io*"` will skip all links to the crates.io website. See the [used lib](https://github.com/becheran/wildmatch) for more information. | | `--markup-types` | `-t` | Comma separated list list of markup types which shall be checked [possible values: md, html] | | `--root-dir` | `-r` | All links to the file system starting with a slash on linux or backslash on windows will use another virtual root dir. For example the link in a file `[link](/dir/other/file.md)` checked with the cli arg `--root-dir /env/another/dir` will let *mlc* check the existence of `/env/another/dir/dir/other/file.md`. | @@ -137,7 +137,7 @@ offline = true # Check the exact file extension when searching for a file match-file-extension= true # List of files and directories which will be ignored -ignore-path=["./ignore-me","./src"] +ignore-path=["./ignore-me","./src","./build-*"] # List of links which will be ignored ignore-links=["http://ignore-me.de/*","http://*.ignoresub-domain/*"] # List of markup types which shall be checked diff --git a/src/cli.rs b/src/cli.rs index 8e18717..09259f5 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -3,9 +3,11 @@ use crate::Config; use crate::OptionalConfig; use clap::Arg; use clap::ArgAction; +use glob::glob_with; +use glob::MatchOptions; use std::fs; -use std::path::Path; use std::path::MAIN_SEPARATOR; +use std::path::{Path, PathBuf}; const CONFIG_FILE_PATH: &str = "./.mlc.toml"; @@ -146,6 +148,11 @@ pub fn parse_args() -> Config { if let Some(ignore_links) = matches.get_many::("ignore-links") { opt.ignore_links = Some(ignore_links.map(|x| x.to_string()).collect()); } + let options = MatchOptions { + case_sensitive: true, + require_literal_separator: false, + require_literal_leading_dot: false, + }; if let Some(ignore_path) = matches.get_many::("ignore-path") { opt.ignore_path = Some(ignore_path.map(|x| Path::new(x).to_path_buf()).collect()); @@ -178,3 +185,44 @@ pub fn parse_args() -> Config { optional: opt, } } + +pub fn collect_ignore_paths<'a, I>(ignore_paths: I, options: MatchOptions) -> Vec +where + I: Iterator, +{ + let mut collected_paths = Vec::new(); + + for x in ignore_paths { + if x.contains('*') { + collected_paths.extend(handle_glob_path(x, options)); + } else { + collected_paths.push(handle_literal_path(x)); + } + } + + collected_paths +} + +fn handle_glob_path(pattern: &str, options: MatchOptions) -> Vec { + let mut paths = Vec::new(); + + for entry in glob_with(pattern, options).unwrap() { + match entry { + Ok(p) => match fs::canonicalize(&p) { + Ok(pa) => paths.push(pa), + Err(e) => panic!("Ignore path {:?} not found. {:?}.", &p, e), + }, + Err(e) => panic!("Ignore path not found. {:?}.", e), + } + } + + paths +} + +fn handle_literal_path(path_str: &str) -> PathBuf { + let path = Path::new(path_str).to_path_buf(); + match fs::canonicalize(&path) { + Ok(p) => p, + Err(e) => panic!("Ignore path {:?} not found. {:?}.", &path, e), + } +} diff --git a/tests/file_traversal.rs b/tests/file_traversal.rs index 061cc14..711e0e7 100644 --- a/tests/file_traversal.rs +++ b/tests/file_traversal.rs @@ -1,9 +1,10 @@ #[cfg(test)] +use mlc::cli::collect_ignore_paths; use mlc::file_traversal; use mlc::markup::{MarkupFile, MarkupType}; use mlc::Config; use mlc::OptionalConfig; -use std::path::Path; +use std::path::{Path, PathBuf}; #[test] fn find_markdown_files() { @@ -36,3 +37,26 @@ fn empty_folder() { file_traversal::find(&config, &mut result); assert!(result.is_empty()); } + +#[test] +fn glob_test() { + let options = glob::MatchOptions { + case_sensitive: true, + require_literal_separator: false, + require_literal_leading_dot: false, + }; + + let dir = PathBuf::from("./benches/benchmark"); + let c_dir = std::fs::canonicalize(dir).expect("Canonicalize failed"); + + let glob_dir = "./benches/ben*".to_string(); + let ignore_paths = vec![&glob_dir]; + + let collected_paths = collect_ignore_paths(ignore_paths.into_iter(), options); + + assert!( + collected_paths.contains(&c_dir), + "The expected globbed path is not in the collected paths: {:?}", + collected_paths + ); +}