From 43c73cff98f743e0e016e6a88d0de67d50b0396c Mon Sep 17 00:00:00 2001 From: cqb13 Date: Sat, 17 Feb 2024 14:20:57 -0500 Subject: [PATCH] improved latin list and added english list --- Cargo.lock | 66 +++++++++++++++++++ Cargo.toml | 1 + .../dictionary_values.rs | 2 +- src/main.rs | 14 +++- src/use_data/mod.rs | 21 +++++- .../parsers/english_dictionary_parser.rs | 59 +++++++++++++++++ .../parsers/latin_dictionary_parser.rs | 57 +++++++++------- src/use_data/utils.rs | 36 ++++++++++ 8 files changed, 228 insertions(+), 28 deletions(-) create mode 100644 src/use_data/parsers/english_dictionary_parser.rs create mode 100644 src/use_data/utils.rs diff --git a/Cargo.lock b/Cargo.lock index f57eaf0..0338a68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,12 +2,41 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "getrandom" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "itoa" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + [[package]] name = "proc-macro2" version = "1.0.78" @@ -26,6 +55,36 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + [[package]] name = "ryu" version = "1.0.16" @@ -84,6 +143,13 @@ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" name = "vocab-vault" version = "0.1.1" dependencies = [ + "rand", "serde", "serde_json", ] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" diff --git a/Cargo.toml b/Cargo.toml index 553eac9..6cdab13 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ categories = ["command-line-utilities"] [dependencies] serde = { version = "1.0.196", features = ["derive"] } serde_json = "1.0.113" +rand = "0.8.4" [[bin]] name = "vocab-vault" diff --git a/src/dictionary_structures/dictionary_values.rs b/src/dictionary_structures/dictionary_values.rs index 58e99e4..b1c9412 100644 --- a/src/dictionary_structures/dictionary_values.rs +++ b/src/dictionary_structures/dictionary_values.rs @@ -464,7 +464,7 @@ impl<'de> Deserialize<'de> for UniqueLatinWordInfo { } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct EnglishWordInfo { pub orth: String, pub wid: i32, diff --git a/src/main.rs b/src/main.rs index ed4acd4..7216858 100644 --- a/src/main.rs +++ b/src/main.rs @@ -119,6 +119,14 @@ fn main() { .with_help("Get words from a random position") .requires("amount"), ) + .with_arg( + Arg::new() + .with_name("display") + .with_short('d') + .with_long("display") + .with_value_name("DISPLAY") + .with_help("Will display as json"), + ) .with_arg( Arg::new() .with_name("to") @@ -169,6 +177,7 @@ fn main() { let exact = command.get_value_of("exact"); let amount = command.get_value_of("amount"); let random = command.has("random"); + let display = command.has("display"); let to = command.get_value_of("to"); if type_of_words != "english" @@ -236,7 +245,9 @@ fn main() { ArgValue::Missing(_) => None, }; - get_list(word_type, pos_list, max, min, exact, amount, random, to); + get_list( + word_type, pos_list, max, min, exact, amount, random, display, to, + ); } "help" => { cli.help(); @@ -247,6 +258,7 @@ fn main() { } } +//TODO: get dictionaries here, to not repeat getting them for each word fn latin_to_english( latin_text: &str, max: usize, diff --git a/src/use_data/mod.rs b/src/use_data/mod.rs index 5256a37..7d4f048 100644 --- a/src/use_data/mod.rs +++ b/src/use_data/mod.rs @@ -1,12 +1,17 @@ -use crate::dictionary_structures::dictionary_keys::PartOfSpeech; -use crate::dictionary_structures::dictionary_values::{LatinWordInfo, EnglishWordInfo}; +use self::parsers::english_dictionary_parser::parse_english_dictionary; use self::parsers::latin_dictionary_parser::parse_latin_dictionary; +use crate::dictionary_structures::dictionary_keys::PartOfSpeech; +use crate::dictionary_structures::dictionary_values::{EnglishWordInfo, LatinWordInfo}; +use serde::Serialize; use serde_json; mod parsers { + pub mod english_dictionary_parser; pub mod latin_dictionary_parser; } +mod utils; + #[derive(Debug)] pub enum WordType { English, @@ -41,6 +46,8 @@ impl WordType { } } +#[derive(Debug, Serialize)] +#[serde(untagged)] pub enum OutputList { Latin(Vec), English(Vec), @@ -54,14 +61,22 @@ pub fn get_list( exact: Option, amount: Option, random: bool, + display: bool, to: Option, ) { let list: OutputList = match word_type { WordType::Latin => { let list = parse_latin_dictionary(pos_list, max, min, exact, amount, random); - println!("{}", serde_json::to_string_pretty(&list).unwrap()); OutputList::Latin(list) } + WordType::English => { + let list = parse_english_dictionary(pos_list, max, min, exact, amount, random); + OutputList::English(list) + } _ => unimplemented!(), }; + + if display { + println!("{}", serde_json::to_string_pretty(&list).unwrap()); + } } diff --git a/src/use_data/parsers/english_dictionary_parser.rs b/src/use_data/parsers/english_dictionary_parser.rs new file mode 100644 index 0000000..d39016f --- /dev/null +++ b/src/use_data/parsers/english_dictionary_parser.rs @@ -0,0 +1,59 @@ +use crate::dictionary_structures::dictionary_keys::PartOfSpeech; +use crate::dictionary_structures::dictionary_values::EnglishWordInfo; +use crate::use_data::utils::word_fits_filters; +use crate::utils::data::get_english_dictionary; +use rand::Rng; + +pub fn parse_english_dictionary( + pos_list: Option>, + max: Option, + min: Option, + exact: Option, + amount: Option, + random: bool, +) -> Vec { + let english_dictionary = get_english_dictionary(); + let mut english_word_info_list: Vec = Vec::new(); + + if let Some(amount) = amount { + if random { + let mut rng = rand::thread_rng(); + while english_word_info_list.len() as i32 != amount { + let random_index = rng.gen_range(0..english_dictionary.len()); + let word_at_index = english_dictionary[random_index].clone(); + if !word_fits_filters( + &word_at_index.orth, + &word_at_index.pos, + &pos_list, + &max, + &min, + &exact, + ) { + continue; + } + english_word_info_list.push(word_at_index); + } + } else { + for word in english_dictionary { + if !word_fits_filters(&word.orth, &word.pos, &pos_list, &max, &min, &exact) { + continue; + } + + english_word_info_list.push(word); + if english_word_info_list.len() as i32 == amount { + break; + } + } + } + } else { + for word in english_dictionary { + if !word_fits_filters(&word.orth, &word.pos, &pos_list, &max, &min, &exact) { + continue; + } + + english_word_info_list.push(word); + } + } + + english_word_info_list +} diff --git a/src/use_data/parsers/latin_dictionary_parser.rs b/src/use_data/parsers/latin_dictionary_parser.rs index 8fc655d..173c7ec 100644 --- a/src/use_data/parsers/latin_dictionary_parser.rs +++ b/src/use_data/parsers/latin_dictionary_parser.rs @@ -1,7 +1,10 @@ use crate::dictionary_structures::dictionary_keys::PartOfSpeech; use crate::dictionary_structures::dictionary_values::LatinWordInfo; +use crate::use_data::utils::word_fits_filters; use crate::utils::data::get_latin_dictionary; +use rand::Rng; +//TODO: Generate principle parts, and check for extension senses in parse. pub fn parse_latin_dictionary( pos_list: Option>, max: Option, @@ -13,36 +16,44 @@ pub fn parse_latin_dictionary( let latin_dictionary = get_latin_dictionary(); let mut latin_word_info_list: Vec = Vec::new(); - for word in latin_dictionary { - if let Some(pos_list) = &pos_list { - if !pos_list.contains(&word.pos) { - continue; + if let Some(amount) = amount { + if random { + let mut rng = rand::thread_rng(); + while latin_word_info_list.len() as i32 != amount { + let random_index = rng.gen_range(0..latin_dictionary.len()); + let word_at_index = latin_dictionary[random_index].clone(); + if !word_fits_filters( + &word_at_index.orth, + &word_at_index.pos, + &pos_list, + &max, + &min, + &exact, + ) { + continue; + } + latin_word_info_list.push(word_at_index); } - } - - if let Some(max) = max { - if word.orth.len() > max as usize { - continue; + } else { + for word in latin_dictionary { + if !word_fits_filters(&word.orth, &word.pos, &pos_list, &max, &min, &exact) { + continue; + } + + latin_word_info_list.push(word); + if latin_word_info_list.len() as i32 == amount { + break; + } } } - - if let Some(min) = min { - if word.orth.len() < min as usize { + } else { + for word in latin_dictionary { + if !word_fits_filters(&word.orth, &word.pos, &pos_list, &max, &min, &exact) { continue; } - } - if let Some(exact) = exact { - if word.orth.len() != exact as usize { - continue; - } + latin_word_info_list.push(word); } - - latin_word_info_list.push(word); - } - - if let Some(amount) = amount { - latin_word_info_list.truncate(amount as usize); } latin_word_info_list diff --git a/src/use_data/utils.rs b/src/use_data/utils.rs new file mode 100644 index 0000000..3af67dd --- /dev/null +++ b/src/use_data/utils.rs @@ -0,0 +1,36 @@ +use crate::dictionary_structures::dictionary_keys::PartOfSpeech; + +pub fn word_fits_filters( + word_orth: &str, + word_pos: &PartOfSpeech, + pos_list: &Option>, + max: &Option, + min: &Option, + exact: &Option, +) -> bool { + if let Some(pos_list) = pos_list { + if !pos_list.contains(word_pos) { + return false; + } + } + + if let Some(max) = max { + if word_orth.len() > *max as usize { + return false; + } + } + + if let Some(min) = min { + if word_orth.len() < *min as usize { + return false; + } + } + + if let Some(exact) = exact { + if word_orth.len() != *exact as usize { + return false; + } + } + + true +}