-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
python is finished, hopefully, can get back to emacs lisp
- Loading branch information
Showing
1 changed file
with
173 additions
and
63 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,110 +1,220 @@ | ||
from typing import Optional, List, Dict, Type | ||
import json | ||
|
||
import wn | ||
from simplemma.simplemma import lemmatize | ||
|
||
de = wn.Wordnet("odenet") | ||
|
||
import json | ||
|
||
|
||
def woerterbuch_definitions_and_synonyms(word, part_of_speech=False): | ||
def get_word_objects(word: str, wordnet: wn.Wordnet) -> Optional[List[wn.Word]]: | ||
""" | ||
Get definitions and synonyms for a given word. | ||
Get a list of Word objects for the given word. | ||
Args: | ||
word (str): The word to lookup. | ||
part_of_speech (str or False, optional): If provided, filter results | ||
to show only words with the specified part of speech. Supported | ||
values are 'n' for noun, 'a' for adjective/adverb, and 'v' for verb. | ||
Defaults to False. | ||
word (str): The word to look up. | ||
wordnet (Wordnet): The Wordnet object for querying. | ||
Returns: | ||
str: A JSON-formatted string containing definitions and synonyms | ||
of the word. If multiple parts of speech are found it returns a list with | ||
the parts of speech possibilities. If no words are found it returns False. | ||
list: A list of Word objects or None. | ||
""" | ||
word_objects = wordnet.words(word) | ||
if word_objects: | ||
return word_objects | ||
else: | ||
return None | ||
|
||
Example: | ||
woerterbuch_definitions_and_synonyms('draussen') | ||
woerterbuch_definitions_and_synonyms('orange', 'a') | ||
|
||
def lemmatize_word(word: str, lang: str) -> Optional[str]: | ||
""" | ||
words = de.words(word) | ||
word_object = False | ||
Lemmatize a word using the specified language. | ||
# If no word was found get the lemma and try again. | ||
if not words: | ||
lemma = lemmatize(word, lang="de") | ||
if lemma != word: | ||
words = de.words(lemma) | ||
if not words: | ||
return False | ||
|
||
# If the array holds more then one word return the possibilities. | ||
# If the optional argument pos is provided select the part of speech | ||
# accordingly. | ||
if len(words) > 1: | ||
parts_of_speech = ["parts of speech"] | ||
for w in words: | ||
pos = w.pos | ||
if part_of_speech and part_of_speech == pos: | ||
word_object = w | ||
break | ||
else: | ||
parts_of_speech.append(pos) | ||
if not word_object: | ||
return json.dumps(parts_of_speech, ensure_ascii=False) | ||
Args: | ||
word (str): The word to lemmatize. | ||
lang (str, optional): The language for lemmatization. | ||
Returns: | ||
str: The lemmatized form of the word or None. | ||
""" | ||
lemma = lemmatize(word, lang=lang) | ||
if lemma != word: | ||
return lemma | ||
else: | ||
word_object = words[0] | ||
return None | ||
|
||
def filter_by_part_of_speech( word_objects: List[wn.Word], part_of_speech: str,) -> Optional[wn.Word]: | ||
""" | ||
Filter a list of Word objects by a specific part of speech. | ||
Args: | ||
word_objects (List[wn.Word]): A list of Word objects. | ||
part_of_speech (str): The desired part of speech. | ||
Returns: | ||
Optional[wn.Word]: The first word object matching the specified part of | ||
speech or None if no match is found. | ||
""" | ||
for word_object in word_objects: | ||
pos = word_object.pos | ||
if part_of_speech and part_of_speech == pos: | ||
return word_object | ||
return None | ||
|
||
def get_parts_of_speech(word_objects: List[wn.Word]) -> Optional[str]: | ||
""" | ||
Get a JSON string of parts of speech from a list of Word objects. | ||
Args: | ||
word_objects (List[wn.Word]): A list of Word objects. | ||
Returns: | ||
Optional[str]: A JSON string containing a list of parts of speech | ||
extracted from the Word objects. Returns None if the parts of speech | ||
list is empty. | ||
""" | ||
parts_of_speech = [] | ||
for word_object in word_objects: | ||
parts_of_speech.append(word_object.pos) | ||
if parts_of_speech: | ||
return json.dumps(parts_of_speech, ensure_ascii=False) | ||
else: | ||
return None | ||
|
||
|
||
def get_definitions_and_synonyms(word_object: wn.Word) -> List[Dict[str, List[str]]]: | ||
""" | ||
Get definitions and synonyms for a Word object. | ||
# Get the synsets which is a group of words with the same definition. | ||
Args: | ||
word_object (Word): A Word object. | ||
Returns: | ||
list: A list of dictionaries containing definitions and synonyms. | ||
""" | ||
synsets = word_object.synsets() | ||
definitions_and_synonyms = [] | ||
for synset in synsets: | ||
definition = synset.definition() | ||
synonyms = synset.lemmas() | ||
|
||
# Remove the word itself. | ||
word_lemma = word_object.lemma() | ||
if word_lemma in synonyms: | ||
synonyms.remove(word_lemma) | ||
definitions_and_synonyms.append( | ||
{"definition": definition, "synonyms": synonyms} | ||
) | ||
return definitions_and_synonyms | ||
|
||
# Add the defintion and it synonyms (as children). | ||
definitions_and_synonyms.append([definition, synonyms]) | ||
output = {"word": word_object.lemma(), "definitions": definitions_and_synonyms} | ||
return json.dumps(output, ensure_ascii=False) | ||
def woerterbuch_definitions_and_synonyms(word: str, part_of_speech: str = None, wordnet_name: str = "odenet", lemma_lang: str = "de"): | ||
""" | ||
Get definitions and synonyms for a given word. | ||
def get_definitions(word): | ||
word = de.words(word)[0] | ||
synsets = word.synsets() | ||
definitions = [] | ||
for synset in synsets: | ||
definitions.append(synset.definition()) | ||
return definitions | ||
Args: | ||
word (str): The word to look up. | ||
part_of_speech (str or None, optional): If provided, filter results | ||
to show only words with the specified part of speech. Supported | ||
values are 'n' for noun, 'a' for adjective/adverb, and 'v' for verb. | ||
Defaults to None. | ||
wordnet_name (str, optional): The name of the WordNet database to use. | ||
Defaults to "odenet". | ||
lemma_lang (str, optional): The language for lemmatization. Defaults to "de". | ||
Returns: | ||
str: A JSON-formatted string containing definitions and synonyms | ||
of the word. If multiple parts of speech are found, it returns a list with | ||
the possible parts of speech. If no words are found, it returns None. | ||
Example: | ||
>>> woerterbuch_definitions_and_synonyms('draussen') | ||
>>> woerterbuch_definitions_and_synonyms('orange', 'a') | ||
""" | ||
|
||
wordnet: wn.Wordnet = wn.Wordnet(wordnet_name) | ||
word_objects: Optional[List[wn.Word]] = None | ||
word_object: Optional[wn.Word] = None | ||
word_lemma: Optional[str] = None | ||
|
||
word_objects = get_word_objects(word, wordnet) | ||
|
||
# If no word was found get the lemma and try again. | ||
if not word_objects: | ||
word_lemma = lemmatize_word(word, lemma_lang) | ||
if word_lemma: | ||
word_objects = get_word_objects(word_lemma, wordnet) | ||
if not word_objects: | ||
return None | ||
|
||
# If multiple objects are found then return the parts of speech to choose | ||
# from. | ||
if len(word_objects) > 1: | ||
# If part of speech is set filter by it. | ||
if part_of_speech: | ||
word_object = filter_by_part_of_speech(word_objects, part_of_speech) | ||
if not word_object: | ||
return None | ||
# Else return a list of part of speeches. | ||
else: | ||
return get_parts_of_speech(word_objects) | ||
else: | ||
word_object = word_objects[0] | ||
|
||
definitions_and_synonyms = get_definitions_and_synonyms(word_object) | ||
|
||
if definitions_and_synonyms: | ||
# The german used in Switzerland doesn't use the Eszett. During | ||
# lemmatization double s might be converted to Eszett. In this case the | ||
# lemma should be None. | ||
if word_lemma and word == word_lemma.replace("ß", "ss"): | ||
word_lemma = None | ||
# The return value includes the word used as parameter, the lemma (might be | ||
# different to the param and the defintions with the synonyms for each | ||
# definition. | ||
output = { | ||
"word param": word, | ||
"word lemma": word_lemma, | ||
"definitions": definitions_and_synonyms, | ||
} | ||
return json.dumps(output, ensure_ascii=False) | ||
else: | ||
return None | ||
|
||
def process_command(command: str, args: str) -> Optional[str]: | ||
""" | ||
Process a command and its arguments using registered functions. | ||
# TODO | ||
def get_synonyms(word): | ||
# lemmas = sorted(lemmas, key=str.casefold) | ||
return word | ||
Args: | ||
command (str): The command to be executed. | ||
args (str): The comma-separated arguments for the command. | ||
Returns: | ||
str: The result of the executed command or an "Invalid command" message. | ||
The command returns a string or None. | ||
""" | ||
|
||
def process_command(command, args): | ||
functions = { | ||
"woerterbuch_definitions_and_synonyms": woerterbuch_definitions_and_synonyms, | ||
"get_synonyms": get_synonyms, | ||
"get_definitions": get_definitions | ||
# "get_synonyms": get_synonyms, | ||
# "get_definitions": get_definitions, | ||
} | ||
|
||
if command in functions: | ||
func = functions[command] | ||
parsed_args = [arg.strip() for arg in args.split(",")] | ||
|
||
return func(*parsed_args) | ||
else: | ||
return "Invalid command" | ||
|
||
|
||
while True: | ||
# Wait input from the user (read a string from standard input) | ||
line = input() | ||
|
||
# Split the user input into command and arguments using the comma as a separator. | ||
command, args = line.split(",", 1) | ||
|
||
# Call the 'process_command' function with the extracted command and arguments. | ||
result = process_command(command, args) | ||
print(result) | ||
|
||
# Print the result of the executed command or an error message. | ||
print(result) |