From 4e8b36308e057e3874b978aef13bb2becced0c94 Mon Sep 17 00:00:00 2001 From: cqb13 Date: Fri, 20 Sep 2024 17:51:25 -0400 Subject: [PATCH] made some small improvements --- src/translators/latin_to_english/parser.rs | 21 +- wiki/dictionary-data-structure.md | 48 ++++ wiki/dictionary-keys.md | 247 +++++++++++++++++++++ wiki/output-structure.md | 188 ++++++++++++++++ 4 files changed, 494 insertions(+), 10 deletions(-) create mode 100644 wiki/dictionary-data-structure.md create mode 100644 wiki/dictionary-keys.md create mode 100644 wiki/output-structure.md diff --git a/src/translators/latin_to_english/parser.rs b/src/translators/latin_to_english/parser.rs index f78eb68..568cc07 100644 --- a/src/translators/latin_to_english/parser.rs +++ b/src/translators/latin_to_english/parser.rs @@ -7,16 +7,17 @@ use crate::translators::latin_to_english::LatinTranslationInfo; use crate::utils::data::{get_latin_inflections, get_latin_stems, get_unique_latin_words}; pub fn parse(latin_word: &str, reduced: bool) -> Option> { - match parse_unique_latin_words(latin_word) { - Some(unique_word) => { - let mut translation = LatinTranslationInfo::new(); - translation.word = unique_word; - return Some(vec![translation]); - } - None => (), + match find_form(latin_word, reduced) { + Some(form) => return Some(form), + None => match parse_unique_latin_words(latin_word) { + Some(unique_word) => { + let mut translation = LatinTranslationInfo::new(); + translation.word = unique_word; + return Some(vec![translation]); + } + None => None, + }, } - - find_form(latin_word, reduced) } fn parse_unique_latin_words(latin_word: &str) -> Option { @@ -112,7 +113,7 @@ fn check_stems( } }; - //???: Weird issue here where some words get inflections but should not (cur) + //TODO: Weird issue here where some words get inflections but should not (cur) if n_from_stem.len() == 1 && n_from_stem[0] != n_from_inflection[0] { continue; } diff --git a/wiki/dictionary-data-structure.md b/wiki/dictionary-data-structure.md new file mode 100644 index 0000000..bf8a99c --- /dev/null +++ b/wiki/dictionary-data-structure.md @@ -0,0 +1,48 @@ +# Dictionary Data Structure + +The dictionary is a collection of JSON files, with json objects in an array. + +## English Words + +```json +{ + "orth": "word", + "wid": 0, + "pos": "part of speech", + "frequency_type": "very frequent", + "true_frequency": 0, + "frequency": 0, + "compound": 0, + "semi": 0 +} +``` + +orth: the word in English +wid: the id of corresponding Latin word +pos: part of speech +frequency_type: frequency of the word +true_frequency: frequency + compound - semi +frequency: frequency of the word (A=>70, B=>60, C=>50, D=>40, E=>30, F=>20) +compound: compound word (('very tall' vs. 'tall')) yes(0)/no(10) +semi: part of a meaning set off by semi-colons + +## Latin Dictionary + +```json +{ + "orth": "latin word", + "parts": ["first part", "second part", "third part", "fourth part"], + "senses": ["meaning 1", "meaning 2", "meaning 3"], + "pos": "part of speech", + "form": "form of the word", + "info": { + "age": "when it was used", + "area": "the field in which it was used", + "geo": "the area in which it was used", + "freq": "frequency of use", + "source": "source for the word in the dictionary" + }, + "n": [0, 0], + "id": 0 +} +``` diff --git a/wiki/dictionary-keys.md b/wiki/dictionary-keys.md new file mode 100644 index 0000000..97a272e --- /dev/null +++ b/wiki/dictionary-keys.md @@ -0,0 +1,247 @@ +# Key Translations + +Without any modifiers, the output will show the raw data from the dictionary. This data is not easy to read, so is translated into a more readable format. The following is a list of the translations that are used. + +## Comparisons + +| Key | Translation | +| :---- | :---------- | +| POS | Positive | +| COMP | Comparative | +| SUPER | Superlative | +| X | Unknown | + +## Declensions + +| Key | Translation | +| :-- | :---------- | +| NOM | Nominative | +| GEN | Genitive | +| DAT | Dative | +| ACC | Accusative | +| ABL | Ablative | +| VOC | Vocative | +| LOC | Locative | +| X | Unknown | + +## Gender + +| Key | Translation | +| :-- | :---------- | +| M | Masculine | +| F | Feminine | +| N | Neuter | +| C | Common | +| X | Unknown | + +## Mood + +| Key | Translation | +| :-- | :---------- | +| IND | Indicative | +| IMP | Imperative | +| SUB | Subjunctive | +| INF | Infinitive | +| X | Unknown | + +## Noun + +| Key | Translation | +| :-- | :---------------------- | +| S | Singular | +| P | Plural / Multiple | +| A | Abstract Idea | +| G | Group Name (ex> Romans) | +| N | Proper Name | +| P | Person | +| T | Thing | +| L | Location | +| W | Place (Where) | +| X | Unknown | + +## Numeral + +| Key | Translation | +| :--- | :----------- | +| CARD | Cardinal | +| ORD | Ordinal | +| Dist | Distributive | +| X | Unknown | + +## Number + +| Key | Translation | +| :-- | :---------- | +| S | Singular | +| P | Plural | +| X | Unknown | + +## Part Of Speech + +| Key | Translation | +| :----- | :------------------------ | +| N | Noun | +| V | Verb | +| VPAR | Verb Participle | +| ADJ | Adjective | +| PREP | Preposition | +| PRON | Pronoun | +| INTERJ | Interjection | +| NUM | Numeral | +| CONJ | Conjunction | +| ADV | Adverb | +| INT | Number | +| SUPINE | Supine | +| PACK | Packon (dictionary code ) | +| TACKON | Tackon (dictionary code ) | +| PREFIC | Prefix | +| SUFFIX | Suffix | + +## Pronoun + +| Key | Translation | +| :----- | :------------ | +| PERS | Personal | +| DEMONS | Demonstrative | +| REL | Relative | +| INTERR | Interrogative | +| INDEF | Indefinite | +| REFLEX | Reflexive | +| ADJECT | Adjective | +| X | Unknown | + +## Tense + +| Key | Translation | +| :--- | :------------- | +| PRES | Present | +| IMPF | Imperfect | +| FUT | Future | +| PERF | Perfect | +| PLUP | Pluperfect | +| FUTP | Future Perfect | +| INF | Infinitive | +| X | Unknown | + +## Verb + +| Key | Translation | +| :------- | :----------------------- | +| TO_BE | to be | +| TO_BEING | to being | +| GEN | takes genitive | +| DAT | takes dative | +| ABL | takes ablative | +| TRANS | transitive | +| INTRANS | intransitive | +| IMPERS | impersonal (it/they/god) | +| DEP | deponent | +| SEMIDEP | semi-deponent | +| PERFDEF | perfect definite | +| X | Unknown | + +## Voice + +| Key | Translation | +| :------ | :---------- | +| ACTIVE | Active | +| PASSIVE | Passive | +| X | Unknown | + +## Age + +| Key | Translation | +| :-- | :------------------- | +| A | Archaic | +| B | Early | +| C | Classical | +| D | Late | +| E | Later | +| F | Medieval | +| G | Scholar | +| H | Modern | +| X | Used throughout ages | + +## Area + +| Key | Translation | +| :-- | :----------- | +| A | Agriculture | +| B | Biological | +| D | Art | +| E | Religious | +| G | Grammar | +| L | Legal | +| P | Poetic | +| S | Scientific | +| T | Technical | +| W | Warfare | +| Y | Mythological | +| X | All or None | + +## Geography + +| Key | Translation | +| :-- | :------------- | +| A | Africa | +| B | Britain | +| C | China | +| D | Scandinavia | +| E | Egypt | +| F | France / Gaul | +| G | Germany | +| H | Greece | +| I | Italy / Rome | +| J | India | +| K | Balkans | +| N | Netherlands | +| P | Persia | +| Q | Near East | +| R | Russia | +| S | Spain / Iberia | +| U | Eastern Europe | +| X | All or None | + +## Frequency + +| Key | Translation | +| :-- | :------------------------------------ | +| A | Very Frequent | +| B | Frequent | +| C | Common | +| D | Lesser | +| E | Uncommon | +| F | Very Rare | +| I | Inscription | +| M | Graffiti | +| N | Pliny (only in Pliny Natural History) | +| X | All or None | + +## Source + +| Key | Translation | +| :-- | :------------------------------------------------------------- | +| B | C.H.Beeson, A Primer of Medieval Latin, 1925 (Bee) | +| C | Charles Beard, Cassell's Latin Dictionary 1892 (CAS) | +| D | J.N.Adams, Latin Sexual Vocabulary, 1982 (Sex) | +| E | L.F.Stelten, Dictionary of Eccles. Latin, 1995 (Ecc) | +| F | Roy J. Deferrari, Dictionary of St. Thomas Aquinas, 1960 (DeF) | +| G | Gildersleeve + Lodge, Latin Grammar 1895 (G+L) | +| H | Collatinus Dictionary by Yves Ouvrard | +| I | Leverett, F.P., Lexicon of the Latin Language, Boston 1845 | +| K | Calepinus Novus, modern Latin, by Guy Licoppe (Cal) | +| L | Lewis, C.S., Elementary Latin Dictionary 1891 | +| M | Latham, Revised Medieval Word List, 1980 | +| N | Lynn Nelson, Wordlist | +| O | Oxford Latin Dictionary, 1982 (OLD) | +| P | Souter, A Glossary of Later Latin to 600 A.D., Oxford 1949 | +| Q | Other, cited or unspecified dictionaries | +| R | Plater & White, A Grammar of the Vulgate, Oxford 1926 | +| S | Lewis and Short, A Latin Dictionary, 1879 (L+S) | +| T | Found in a translation -- no dictionary reference | +| U | Du Cange | +| V | Vademecum in opus Saxonis - Franz Blatt (Saxo) | +| W | My personal guess | +| Y | Temp special code | +| Z | Sent by user -- no dictionary reference | +| X | general | diff --git a/wiki/output-structure.md b/wiki/output-structure.md new file mode 100644 index 0000000..12546b8 --- /dev/null +++ b/wiki/output-structure.md @@ -0,0 +1,188 @@ +# Output Structure + +Vocab Vault has two output formats: `json` and a custom `pretty` format like the one used in Whitakers Words. + +## JSON + +JSON is the default output format. It is a simple format that is easy to parse and use in other programs. by default all fields are present, but adding the `-c` or `--clean` flag will remove all fields that are not present in the entry. + +### Latin to English + +```json +[ + { + "word": "searched word", + "definitions": [ + { + "tricks": [], // a list of modification applied to the searched word + "word": { + "orth": "word", + "parts": ["first part", "second part", "third part", "fourth part"], + "senses": ["meaning 1", "meaning 2", "meaning 3"], + "pos": "part of speech", + "form": { + // note that that in clean format, not all of these fields are present + "declension": "declension of hte word", + "number": "singular / plural", + "gender": "gender of the word", + "tense": "tense of the word", + "voice": "voice that the word is used with", + "mood": "the mood of the word", + "verb": "the kind of verb that the word is", + "kind": "the kind of word that it is", + "person": 0 // 1st, 2nd, 3rd person + }, + "info": { + "age": "when it was used", + "area": "the field in which it was used", + "geo": "the area in which it was used", + "freq": "frequency of use", + "source": "source for the word in the dictionary" + }, + "n": [0, 0], + "modifiers": [ + // prefixes and suffixes or other modifiers + { + "pos": "part of speech", + "senses": ["meaning 1", "meaning 2", "meaning 3"], + "orth": "modifier word", + "modifier": "type of modifier" + } + ], + "id": 0, // id of the latin word + "extension_senses": ["other meanings of the word"] + }, + "stem": { + "pos": "noun", + "form": { + "declension": "2nd declension", + "number": "", + "gender": "masculine", + "tense": "", + "voice": "", + "mood": "", + "verb": "", + "kind": "person", + "person": 0 + }, + "orth": "discipul", + "n": [0, 0], + "wid": 18070 + }, + "inflections": [ + // a list of all possible inflections for the word + { + "ending": "i", + "pos": "noun", + "note": "", + "n": [2, 1], + "form": { + "declension": "genitive", + "number": "singular", + "gender": "unknown", + "tense": "", + "voice": "", + "mood": "", + "verb": "", + "kind": "", + "person": 0 + } + } + ], + "addon": "" + } + ] + } +] +``` + +### English to Latin + +```json +[ + { + "word": "searched word", + "definitions": [ + { + "word": { + "orth": "word", + "wid": 0, // id of the latin word + "pos": "part of speech", + "frequency_type": "very frequent", + "true_frequency": 0, // frequency + compound - semi + "frequency": 0, // frequency in latin language + "compound": 0, + "semi": 0 + }, + "translation": { + "orth": "latin word", + "parts": ["first part", "second part", "third part", "fourth part"], + "senses": ["meaning 1", "meaning 2", "meaning 3"], + "pos": "part of speech", + "form": { + // note that that in clean format, not all of these fields are present + "declension": "declension of hte word", + "number": "singular / plural", + "gender": "gender of the word", + "tense": "tense of the word", + "voice": "voice that the word is used with", + "mood": "the mood of the word", + "verb": "the kind of verb that the word is", + "kind": "the kind of word that it is", + "person": 0 // 1st, 2nd, 3rd person + }, + "info": { + "age": "when it was used", + "area": "the field in which it was used", + "geo": "the area in which it was used", + "freq": "frequency of use", + "source": "source for the word in the dictionary" + }, + "n": [ + // information on word type + 0, 0 + ], + "id": 0 // id of the latin word + } + } + ] + } +] +``` + +## Pretty Output + +The pretty output format is a custom format that is designed to be easy to read and use. It is designed to be similar to the output format used in Whitakers Words. To use this format add the `-p` or `--pretty` flag. The pretty format is designed to be used with the `-c` or `--clean` flag. Some information is hidden in the format, to make it visible add the `-d` or `--detailed` flag. + +### Format + +```text +searched word + +word +part of speech +information on word type +age: when it was used | area: the field in which it was used | geo: the area in which it was used | freq: frequency of use | source: source for the word in the dictionary // only present if -d flag is used +meaning 1 | meaning 2 | meaning 3 + +word +part of speech +information on word type +age: when it was used | area: the field in which it was used | geo: the area in which it was used | freq: frequency of use | source: source for the word in the dictionary // only present if -d flag is used +meaning 1 | meaning 2 | meaning 3 + +word +part of speech +information on word type +age: when it was used | area: the field in which it was used | geo: the area in which it was used | freq: frequency of use | source: source for the word in the dictionary // only present if -d flag is used +meaning 1 | meaning 2 | meaning 3 + +--------------------------------- +second searched word + +word +part of speech +information on word type +age: when it was used | area: the field in which it was used | geo: the area in which it was used | freq: frequency of use | source: source for the word in the dictionary // only present if -d flag is used +meaning 1 | meaning 2 | meaning 3 +```