From fd66013d23ff1620f73b3b55597fa86d7a538f41 Mon Sep 17 00:00:00 2001 From: cqb13 Date: Wed, 21 Feb 2024 10:48:37 -0500 Subject: [PATCH] added modifier and stem parsing to get list --- src/use_data/mod.rs | 26 ++++++++- src/use_data/parsers/modifiers_parser.rs | 72 ++++++++++++++++++++++++ src/use_data/parsers/stem_parser.rs | 59 +++++++++++++++++++ 3 files changed, 154 insertions(+), 3 deletions(-) create mode 100644 src/use_data/parsers/modifiers_parser.rs create mode 100644 src/use_data/parsers/stem_parser.rs diff --git a/src/use_data/mod.rs b/src/use_data/mod.rs index f90911a..8ffd741 100644 --- a/src/use_data/mod.rs +++ b/src/use_data/mod.rs @@ -2,13 +2,16 @@ use self::parsers::attachment_parser::parse_attachments; use self::parsers::english_dictionary_parser::parse_english_dictionary; use self::parsers::latin_dictionary_parser::parse_latin_dictionary; use self::parsers::latin_inflection_parser::parse_latin_inflections; +use self::parsers::modifiers_parser::parse_modifiers; +use self::parsers::stem_parser::parse_latin_stems; use self::parsers::unique_latin_dictionary_parser::parse_unique_latin_words; use crate::dictionary_structures::dictionary_keys::PartOfSpeech; use crate::dictionary_structures::dictionary_values::{ - Attachment, EnglishWordInfo, Inflection, LatinWordInfo, + Attachment, EnglishWordInfo, Inflection, LatinWordInfo, Modifier, Stem, }; use crate::utils::data::{ - get_latin_not_packons, get_latin_packons, get_latin_tackons, get_latin_tickons, + get_latin_not_packons, get_latin_packons, get_latin_prefixes, get_latin_suffixes, + get_latin_tackons, get_latin_tickons, }; use serde::Serialize; use serde_json; @@ -18,6 +21,8 @@ mod parsers { pub mod english_dictionary_parser; pub mod latin_dictionary_parser; pub mod latin_inflection_parser; + pub mod modifiers_parser; + pub mod stem_parser; pub mod unique_latin_dictionary_parser; } @@ -64,6 +69,8 @@ pub enum OutputList { English(Vec), Inflections(Vec), Attachment(Vec), + Modifiers(Vec), + Stems(Vec), } pub fn get_list( @@ -100,6 +107,20 @@ pub fn get_list( let list = parse_attachments(attachments, None, max, min, exact, amount, random); OutputList::Attachment(list) } + WordType::Prefixes => { + let modifiers = get_latin_prefixes(); + let list = parse_modifiers(modifiers, pos_list, max, min, exact, amount, random); + OutputList::Modifiers(list) + } + WordType::Stems => { + let list = parse_latin_stems(pos_list, max, min, exact, amount, random); + OutputList::Stems(list) + } + WordType::Suffixes => { + let modifiers = get_latin_suffixes(); + let list = parse_modifiers(modifiers, pos_list, max, min, exact, amount, random); + OutputList::Modifiers(list) + } WordType::Tackons => { let attachments = get_latin_tackons(); let list = parse_attachments(attachments, None, max, min, exact, amount, random); @@ -114,7 +135,6 @@ pub fn get_list( let list = parse_unique_latin_words(pos_list, max, min, exact, amount, random); OutputList::Latin(list) } - _ => unimplemented!(), }; if display { println!("{}", serde_json::to_string_pretty(&list).unwrap()); diff --git a/src/use_data/parsers/modifiers_parser.rs b/src/use_data/parsers/modifiers_parser.rs new file mode 100644 index 0000000..9e112b9 --- /dev/null +++ b/src/use_data/parsers/modifiers_parser.rs @@ -0,0 +1,72 @@ +use crate::dictionary_structures::dictionary_keys::PartOfSpeech; +use crate::dictionary_structures::dictionary_values::Modifier; +use crate::use_data::utils::word_fits_filters; +use rand::Rng; + +pub fn parse_modifiers( + modifiers: Vec, + pos_list: Option>, + max: Option, + min: Option, + exact: Option, + amount: Option, + random: bool, +) -> Vec { + let mut modifier_list: Vec = Vec::new(); + + if let Some(amount) = amount { + if random { + let mut rng = rand::thread_rng(); + while modifier_list.len() as i32 != amount { + let random_index = rng.gen_range(0..modifiers.len()); + let modifier_at_index = modifiers[random_index].clone(); + if !word_fits_filters( + &modifier_at_index.orth, + &modifier_at_index.pos, + &pos_list, + &max, + &min, + &exact, + ) { + continue; + } + modifier_list.push(modifier_at_index); + } + } else { + for modifier in modifiers { + if !word_fits_filters( + &modifier.orth, + &modifier.pos, + &pos_list, + &max, + &min, + &exact, + ) { + continue; + } + + modifier_list.push(modifier); + if modifier_list.len() as i32 == amount { + break; + } + } + } + } else { + for modifier in modifiers { + if !word_fits_filters( + &modifier.orth, + &modifier.pos, + &pos_list, + &max, + &min, + &exact, + ) { + continue; + } + + modifier_list.push(modifier); + } + } + + modifier_list +} diff --git a/src/use_data/parsers/stem_parser.rs b/src/use_data/parsers/stem_parser.rs new file mode 100644 index 0000000..edddb50 --- /dev/null +++ b/src/use_data/parsers/stem_parser.rs @@ -0,0 +1,59 @@ +use crate::dictionary_structures::dictionary_keys::PartOfSpeech; +use crate::dictionary_structures::dictionary_values::Stem; +use crate::use_data::utils::word_fits_filters; +use crate::utils::data::get_latin_stems; +use rand::Rng; + +pub fn parse_latin_stems( + pos_list: Option>, + max: Option, + min: Option, + exact: Option, + amount: Option, + random: bool, +) -> Vec { + let latin_stems = get_latin_stems(); + let mut stem_list: Vec = Vec::new(); + + if let Some(amount) = amount { + if random { + let mut rng = rand::thread_rng(); + while stem_list.len() as i32 != amount { + let random_index = rng.gen_range(0..latin_stems.len()); + let stem_at_index = latin_stems[random_index].clone(); + if !word_fits_filters( + &stem_at_index.orth, + &stem_at_index.pos, + &pos_list, + &max, + &min, + &exact, + ) { + continue; + } + stem_list.push(stem_at_index); + } + } else { + for stem in latin_stems { + if !word_fits_filters(&stem.orth, &stem.pos, &pos_list, &max, &min, &exact) { + continue; + } + + stem_list.push(stem); + if stem_list.len() as i32 == amount { + break; + } + } + } + } else { + for stem in latin_stems { + if !word_fits_filters(&stem.orth, &stem.pos, &pos_list, &max, &min, &exact) { + continue; + } + + stem_list.push(stem); + } + } + + stem_list +}