From 70f92edcad5eb019280280a0efe9152f71d5d04a Mon Sep 17 00:00:00 2001 From: markxoe Date: Mon, 3 Jun 2024 15:04:42 +0200 Subject: [PATCH] wip: add a few tests --- Cargo.lock | 48 ++++++++++++++ Cargo.toml | 1 + src/data/algorithm/bfs.rs | 61 ++++++++++++++++++ src/data/maps/link_map.rs | 21 ++++-- src/data/maps/page_map.rs | 128 +++++++++++++++++++++++++++++++++++-- src/data/parsers/common.rs | 50 +++++++++++++++ 6 files changed, 300 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8f19500..ff63c21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -257,6 +257,22 @@ dependencies = [ "log", ] +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fastrand" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" + [[package]] name = "fuzzy-matcher" version = "0.3.7" @@ -354,6 +370,12 @@ version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + [[package]] name = "lock_api" version = "0.4.12" @@ -494,6 +516,19 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags 2.5.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -573,6 +608,18 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + [[package]] name = "thread_local" version = "1.1.8" @@ -626,6 +673,7 @@ dependencies = [ "log", "regex", "serde", + "tempfile", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index ddc50eb..fbde5b6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,3 +13,4 @@ inquire = "0.7.5" log = { version = "0.4.21", features = ["std"] } regex = { version = "1.10.4", features = ["std"] } serde = { version = "1.0.202", features = ["derive"] } +tempfile = "3.10.1" diff --git a/src/data/algorithm/bfs.rs b/src/data/algorithm/bfs.rs index 516f680..72a5466 100644 --- a/src/data/algorithm/bfs.rs +++ b/src/data/algorithm/bfs.rs @@ -50,3 +50,64 @@ pub fn find_shortest_path(start: i32, end: i32, links: &LinkMap) -> Option2->3->4 and 1->5->4 + let link_map = LinkMap::new_with_progress( + vec![(1, 2), (2, 3), (3, 4), (1, 5), (5, 4)] + .into_iter() + .collect(), + ProgressBuilder::empty(), + ); + + let path = super::find_shortest_path(1, 4, &link_map); + + assert_eq!(path, Some(vec![1, 5, 4])); + } + + #[test] + fn equal_length_uses_first_in_map() { + // path over 1->2->3->4 and 1->5->6->4 + let link_map = LinkMap::new_with_progress( + vec![(1, 2), (2, 3), (3, 4), (1, 5), (5, 6), (6, 4)] + .into_iter() + .collect(), + ProgressBuilder::empty(), + ); + + let path = super::find_shortest_path(1, 4, &link_map); + + assert_eq!(path, Some(vec![1, 2, 3, 4])); + } +} diff --git a/src/data/maps/link_map.rs b/src/data/maps/link_map.rs index d90f0d0..ce355da 100644 --- a/src/data/maps/link_map.rs +++ b/src/data/maps/link_map.rs @@ -21,7 +21,11 @@ impl LinkMap { let (shrink_every, progress_every) = { let links_count = links.len(); - (links_count / 1000, links_count / 1000) + if links_count < 1000 { + (1, 1) + } else { + (links_count / 1000, links_count / 1000) + } }; while let Some((from, to)) = links.pop_front() { @@ -46,11 +50,18 @@ impl LinkMap { LinkMap { forward: map } } - pub fn new(links: VecDeque) -> LinkMap { - LinkMap::new_with_progress(links, ProgressBuilder::empty()) - } - pub fn get(&self, from: i32) -> Option<&Vec> { self.forward.get(&from) } } + +#[test] +fn new_link_map() { + let links = VecDeque::from(vec![(1, 2), (1, 3), (3, 2)]); + + let map = LinkMap::new_with_progress(links, ProgressBuilder::empty()); + + assert_eq!(map.get(1), Some(&vec![2, 3])); + assert_eq!(map.get(2), None); + assert_eq!(map.get(3), Some(&vec![2])); +} diff --git a/src/data/maps/page_map.rs b/src/data/maps/page_map.rs index 9b26989..59e6a97 100644 --- a/src/data/maps/page_map.rs +++ b/src/data/maps/page_map.rs @@ -15,6 +15,7 @@ pub struct PageMap { id_to_redirect: HashMap, } +#[derive(Debug, PartialEq)] pub struct PageMapResult { pub id: i32, pub title: String, @@ -61,10 +62,6 @@ impl PageMap { } } - pub fn new(pages: VecDeque, redirect: VecDeque) -> Self { - Self::new_internal(pages, redirect, ProgressBuilder::empty()) - } - pub fn new_with_progress( pages: VecDeque, redirect: VecDeque, @@ -113,3 +110,126 @@ impl PageMap { Some(page) } } + +#[test] +fn new_page_map() { + let pages = { + let pages = vec![ + Page { + id: 1, + title: "Page 1".to_string(), + redirect: false, + }, + Page { + id: 2, + title: "Page 2".to_string(), + redirect: false, + }, + Page { + id: 3, + title: "Also Page 2".to_string(), + redirect: true, + }, + ]; + VecDeque::from(pages) + }; + + let redirects = { + let redirects = vec![Redirect { + id: 3, + title: "Page 2".to_string(), + }]; + VecDeque::from(redirects) + }; + + let map = PageMap::new_with_progress(pages, redirects, ProgressBuilder::empty()); + + assert_eq!(map.name_to_id("Page 1"), Some(1)); + assert_eq!(map.name_to_id("Page 2"), Some(2)); + assert_eq!(map.name_to_id("Also Page 2"), Some(3)); + + assert_eq!(map.id_to_name(1), Some("Page 1")); + assert_eq!(map.id_to_name(2), Some("Page 2")); + assert_eq!(map.id_to_name(3), Some("Also Page 2")); + + assert_eq!(map.id_to_redirect(1), None); + assert_eq!(map.id_to_redirect(2), None); + assert_eq!(map.id_to_redirect(3), Some(2)); + + assert_eq!( + map.lookup_title("Page 1"), + Some(PageMapResult { + id: 1, + title: "Page 1".to_string(), + redirect: None + }) + ); + assert_eq!( + map.lookup_title("Page 2"), + Some(PageMapResult { + id: 2, + title: "Page 2".to_string(), + redirect: None + }) + ); + assert_eq!( + map.lookup_title("Also Page 2"), + Some(PageMapResult { + id: 3, + title: "Also Page 2".to_string(), + redirect: Some(2) + }) + ); + + assert_eq!( + map.lookup_id(1), + Some(PageMapResult { + id: 1, + title: "Page 1".to_string(), + redirect: None + }) + ); + assert_eq!( + map.lookup_id(2), + Some(PageMapResult { + id: 2, + title: "Page 2".to_string(), + redirect: None + }) + ); + assert_eq!( + map.lookup_id(3), + Some(PageMapResult { + id: 3, + title: "Also Page 2".to_string(), + redirect: Some(2) + }) + ); + + assert_eq!( + map.resolve_by_title("Page 1"), + Some(PageMapResult { + id: 1, + title: "Page 1".to_string(), + redirect: None + }) + ); + + assert_eq!( + map.resolve_by_title("Page 2"), + Some(PageMapResult { + id: 2, + title: "Page 2".to_string(), + redirect: None + }) + ); + + assert_eq!( + map.resolve_by_title("Also Page 2"), + Some(PageMapResult { + id: 2, + title: "Page 2".to_string(), + redirect: None + }) + ); +} diff --git a/src/data/parsers/common.rs b/src/data/parsers/common.rs index c4c4d18..4140f2e 100644 --- a/src/data/parsers/common.rs +++ b/src/data/parsers/common.rs @@ -32,6 +32,10 @@ where C: Clone, C: Send, { + if threads < 2 { + panic!("Threads must be greater than or equal 2"); + } + let (tx, rx) = crossbeam_channel::bounded(0); let reader_thread = std::thread::spawn(move || { @@ -91,3 +95,49 @@ where output } + +mod test { + #[allow(unused_imports)] + use std::{ + env::temp_dir, + io::Write, + sync::{Arc, Mutex}, + }; + + #[test] + fn all_lines_are_read() { + let dir = temp_dir(); + let file_path = dir.join("test.txt"); + + // create file + { + let file = std::fs::File::create(&file_path).expect("Unable to create file"); + let mut writer = std::io::BufWriter::new(file); + for i in 0..1000 { + writeln!(writer, "{}", i).expect("Unable to write to file"); + } + } + + let call_count = Arc::new(Mutex::new(0)); + + // parse file + let result = super::parse_file_async( + file_path.to_str().unwrap().to_string(), + 2, + |line, ctx| { + *ctx.lock().unwrap() += 1; + vec![line.parse::().unwrap()] + }, + call_count.clone(), + ); + + // check that all lines are read + assert_eq!(result.len(), 1000); + for i in 0..1000 { + assert!(result.contains(&i)); + } + + // check parser call count + assert_eq!(*call_count.lock().unwrap(), 1000); + } +}