Skip to content

Commit

Permalink
improved latin list and added english list
Browse files Browse the repository at this point in the history
  • Loading branch information
cqb13 committed Feb 17, 2024
1 parent 94ce3c7 commit 43c73cf
Show file tree
Hide file tree
Showing 8 changed files with 228 additions and 28 deletions.
66 changes: 66 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ categories = ["command-line-utilities"]
[dependencies]
serde = { version = "1.0.196", features = ["derive"] }
serde_json = "1.0.113"
rand = "0.8.4"

[[bin]]
name = "vocab-vault"
Expand Down
2 changes: 1 addition & 1 deletion src/dictionary_structures/dictionary_values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ impl<'de> Deserialize<'de> for UniqueLatinWordInfo {
}
}

#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct EnglishWordInfo {
pub orth: String,
pub wid: i32,
Expand Down
14 changes: 13 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,14 @@ fn main() {
.with_help("Get words from a random position")
.requires("amount"),
)
.with_arg(
Arg::new()
.with_name("display")
.with_short('d')
.with_long("display")
.with_value_name("DISPLAY")
.with_help("Will display as json"),
)
.with_arg(
Arg::new()
.with_name("to")
Expand Down Expand Up @@ -169,6 +177,7 @@ fn main() {
let exact = command.get_value_of("exact");
let amount = command.get_value_of("amount");
let random = command.has("random");
let display = command.has("display");
let to = command.get_value_of("to");

if type_of_words != "english"
Expand Down Expand Up @@ -236,7 +245,9 @@ fn main() {
ArgValue::Missing(_) => None,
};

get_list(word_type, pos_list, max, min, exact, amount, random, to);
get_list(
word_type, pos_list, max, min, exact, amount, random, display, to,
);
}
"help" => {
cli.help();
Expand All @@ -247,6 +258,7 @@ fn main() {
}
}

//TODO: get dictionaries here, to not repeat getting them for each word
fn latin_to_english(
latin_text: &str,
max: usize,
Expand Down
21 changes: 18 additions & 3 deletions src/use_data/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
use crate::dictionary_structures::dictionary_keys::PartOfSpeech;
use crate::dictionary_structures::dictionary_values::{LatinWordInfo, EnglishWordInfo};
use self::parsers::english_dictionary_parser::parse_english_dictionary;
use self::parsers::latin_dictionary_parser::parse_latin_dictionary;
use crate::dictionary_structures::dictionary_keys::PartOfSpeech;
use crate::dictionary_structures::dictionary_values::{EnglishWordInfo, LatinWordInfo};
use serde::Serialize;
use serde_json;

mod parsers {
pub mod english_dictionary_parser;
pub mod latin_dictionary_parser;
}

mod utils;

#[derive(Debug)]
pub enum WordType {
English,
Expand Down Expand Up @@ -41,6 +46,8 @@ impl WordType {
}
}

#[derive(Debug, Serialize)]
#[serde(untagged)]
pub enum OutputList {
Latin(Vec<LatinWordInfo>),
English(Vec<EnglishWordInfo>),
Expand All @@ -54,14 +61,22 @@ pub fn get_list(
exact: Option<i32>,
amount: Option<i32>,
random: bool,
display: bool,
to: Option<String>,
) {
let list: OutputList = match word_type {
WordType::Latin => {
let list = parse_latin_dictionary(pos_list, max, min, exact, amount, random);
println!("{}", serde_json::to_string_pretty(&list).unwrap());
OutputList::Latin(list)
}
WordType::English => {
let list = parse_english_dictionary(pos_list, max, min, exact, amount, random);
OutputList::English(list)
}
_ => unimplemented!(),
};

if display {
println!("{}", serde_json::to_string_pretty(&list).unwrap());
}
}
59 changes: 59 additions & 0 deletions src/use_data/parsers/english_dictionary_parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
use crate::dictionary_structures::dictionary_keys::PartOfSpeech;
use crate::dictionary_structures::dictionary_values::EnglishWordInfo;
use crate::use_data::utils::word_fits_filters;
use crate::utils::data::get_english_dictionary;
use rand::Rng;

pub fn parse_english_dictionary(
pos_list: Option<Vec<PartOfSpeech>>,
max: Option<i32>,
min: Option<i32>,
exact: Option<i32>,
amount: Option<i32>,
random: bool,
) -> Vec<EnglishWordInfo> {
let english_dictionary = get_english_dictionary();
let mut english_word_info_list: Vec<EnglishWordInfo> = Vec::new();

if let Some(amount) = amount {
if random {
let mut rng = rand::thread_rng();
while english_word_info_list.len() as i32 != amount {
let random_index = rng.gen_range(0..english_dictionary.len());
let word_at_index = english_dictionary[random_index].clone();
if !word_fits_filters(
&word_at_index.orth,
&word_at_index.pos,
&pos_list,
&max,
&min,
&exact,
) {
continue;
}
english_word_info_list.push(word_at_index);
}
} else {
for word in english_dictionary {
if !word_fits_filters(&word.orth, &word.pos, &pos_list, &max, &min, &exact) {
continue;
}

english_word_info_list.push(word);
if english_word_info_list.len() as i32 == amount {
break;
}
}
}
} else {
for word in english_dictionary {
if !word_fits_filters(&word.orth, &word.pos, &pos_list, &max, &min, &exact) {
continue;
}

english_word_info_list.push(word);
}
}

english_word_info_list
}
57 changes: 34 additions & 23 deletions src/use_data/parsers/latin_dictionary_parser.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use crate::dictionary_structures::dictionary_keys::PartOfSpeech;
use crate::dictionary_structures::dictionary_values::LatinWordInfo;
use crate::use_data::utils::word_fits_filters;
use crate::utils::data::get_latin_dictionary;
use rand::Rng;

//TODO: Generate principle parts, and check for extension senses in parse.
pub fn parse_latin_dictionary(
pos_list: Option<Vec<PartOfSpeech>>,
max: Option<i32>,
Expand All @@ -13,36 +16,44 @@ pub fn parse_latin_dictionary(
let latin_dictionary = get_latin_dictionary();
let mut latin_word_info_list: Vec<LatinWordInfo> = Vec::new();

for word in latin_dictionary {
if let Some(pos_list) = &pos_list {
if !pos_list.contains(&word.pos) {
continue;
if let Some(amount) = amount {
if random {
let mut rng = rand::thread_rng();
while latin_word_info_list.len() as i32 != amount {
let random_index = rng.gen_range(0..latin_dictionary.len());
let word_at_index = latin_dictionary[random_index].clone();
if !word_fits_filters(
&word_at_index.orth,
&word_at_index.pos,
&pos_list,
&max,
&min,
&exact,
) {
continue;
}
latin_word_info_list.push(word_at_index);
}
}

if let Some(max) = max {
if word.orth.len() > max as usize {
continue;
} else {
for word in latin_dictionary {
if !word_fits_filters(&word.orth, &word.pos, &pos_list, &max, &min, &exact) {
continue;
}

latin_word_info_list.push(word);
if latin_word_info_list.len() as i32 == amount {
break;
}
}
}

if let Some(min) = min {
if word.orth.len() < min as usize {
} else {
for word in latin_dictionary {
if !word_fits_filters(&word.orth, &word.pos, &pos_list, &max, &min, &exact) {
continue;
}
}

if let Some(exact) = exact {
if word.orth.len() != exact as usize {
continue;
}
latin_word_info_list.push(word);
}

latin_word_info_list.push(word);
}

if let Some(amount) = amount {
latin_word_info_list.truncate(amount as usize);
}

latin_word_info_list
Expand Down
36 changes: 36 additions & 0 deletions src/use_data/utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
use crate::dictionary_structures::dictionary_keys::PartOfSpeech;

pub fn word_fits_filters(
word_orth: &str,
word_pos: &PartOfSpeech,
pos_list: &Option<Vec<PartOfSpeech>>,
max: &Option<i32>,
min: &Option<i32>,
exact: &Option<i32>,
) -> bool {
if let Some(pos_list) = pos_list {
if !pos_list.contains(word_pos) {
return false;
}
}

if let Some(max) = max {
if word_orth.len() > *max as usize {
return false;
}
}

if let Some(min) = min {
if word_orth.len() < *min as usize {
return false;
}
}

if let Some(exact) = exact {
if word_orth.len() != *exact as usize {
return false;
}
}

true
}

0 comments on commit 43c73cf

Please sign in to comment.