Skip to content

Commit

Permalink
python is finished, hopefully, can get back to emacs lisp
Browse files Browse the repository at this point in the history
  • Loading branch information
hubisan committed Aug 15, 2023
1 parent aacb48f commit 81fa133
Showing 1 changed file with 173 additions and 63 deletions.
236 changes: 173 additions & 63 deletions init.py
Original file line number Diff line number Diff line change
@@ -1,110 +1,220 @@
from typing import Optional, List, Dict, Type
import json

import wn
from simplemma.simplemma import lemmatize

de = wn.Wordnet("odenet")

import json


def woerterbuch_definitions_and_synonyms(word, part_of_speech=False):
def get_word_objects(word: str, wordnet: wn.Wordnet) -> Optional[List[wn.Word]]:
"""
Get definitions and synonyms for a given word.
Get a list of Word objects for the given word.
Args:
word (str): The word to lookup.
part_of_speech (str or False, optional): If provided, filter results
to show only words with the specified part of speech. Supported
values are 'n' for noun, 'a' for adjective/adverb, and 'v' for verb.
Defaults to False.
word (str): The word to look up.
wordnet (Wordnet): The Wordnet object for querying.
Returns:
str: A JSON-formatted string containing definitions and synonyms
of the word. If multiple parts of speech are found it returns a list with
the parts of speech possibilities. If no words are found it returns False.
list: A list of Word objects or None.
"""
word_objects = wordnet.words(word)
if word_objects:
return word_objects
else:
return None

Example:
woerterbuch_definitions_and_synonyms('draussen')
woerterbuch_definitions_and_synonyms('orange', 'a')

def lemmatize_word(word: str, lang: str) -> Optional[str]:
"""
words = de.words(word)
word_object = False
Lemmatize a word using the specified language.
# If no word was found get the lemma and try again.
if not words:
lemma = lemmatize(word, lang="de")
if lemma != word:
words = de.words(lemma)
if not words:
return False

# If the array holds more then one word return the possibilities.
# If the optional argument pos is provided select the part of speech
# accordingly.
if len(words) > 1:
parts_of_speech = ["parts of speech"]
for w in words:
pos = w.pos
if part_of_speech and part_of_speech == pos:
word_object = w
break
else:
parts_of_speech.append(pos)
if not word_object:
return json.dumps(parts_of_speech, ensure_ascii=False)
Args:
word (str): The word to lemmatize.
lang (str, optional): The language for lemmatization.
Returns:
str: The lemmatized form of the word or None.
"""
lemma = lemmatize(word, lang=lang)
if lemma != word:
return lemma
else:
word_object = words[0]
return None

def filter_by_part_of_speech( word_objects: List[wn.Word], part_of_speech: str,) -> Optional[wn.Word]:
"""
Filter a list of Word objects by a specific part of speech.
Args:
word_objects (List[wn.Word]): A list of Word objects.
part_of_speech (str): The desired part of speech.
Returns:
Optional[wn.Word]: The first word object matching the specified part of
speech or None if no match is found.
"""
for word_object in word_objects:
pos = word_object.pos
if part_of_speech and part_of_speech == pos:
return word_object
return None

def get_parts_of_speech(word_objects: List[wn.Word]) -> Optional[str]:
"""
Get a JSON string of parts of speech from a list of Word objects.
Args:
word_objects (List[wn.Word]): A list of Word objects.
Returns:
Optional[str]: A JSON string containing a list of parts of speech
extracted from the Word objects. Returns None if the parts of speech
list is empty.
"""
parts_of_speech = []
for word_object in word_objects:
parts_of_speech.append(word_object.pos)
if parts_of_speech:
return json.dumps(parts_of_speech, ensure_ascii=False)
else:
return None


def get_definitions_and_synonyms(word_object: wn.Word) -> List[Dict[str, List[str]]]:
"""
Get definitions and synonyms for a Word object.
# Get the synsets which is a group of words with the same definition.
Args:
word_object (Word): A Word object.
Returns:
list: A list of dictionaries containing definitions and synonyms.
"""
synsets = word_object.synsets()
definitions_and_synonyms = []
for synset in synsets:
definition = synset.definition()
synonyms = synset.lemmas()

# Remove the word itself.
word_lemma = word_object.lemma()
if word_lemma in synonyms:
synonyms.remove(word_lemma)
definitions_and_synonyms.append(
{"definition": definition, "synonyms": synonyms}
)
return definitions_and_synonyms

# Add the defintion and it synonyms (as children).
definitions_and_synonyms.append([definition, synonyms])
output = {"word": word_object.lemma(), "definitions": definitions_and_synonyms}
return json.dumps(output, ensure_ascii=False)
def woerterbuch_definitions_and_synonyms(word: str, part_of_speech: str = None, wordnet_name: str = "odenet", lemma_lang: str = "de"):
"""
Get definitions and synonyms for a given word.
def get_definitions(word):
word = de.words(word)[0]
synsets = word.synsets()
definitions = []
for synset in synsets:
definitions.append(synset.definition())
return definitions
Args:
word (str): The word to look up.
part_of_speech (str or None, optional): If provided, filter results
to show only words with the specified part of speech. Supported
values are 'n' for noun, 'a' for adjective/adverb, and 'v' for verb.
Defaults to None.
wordnet_name (str, optional): The name of the WordNet database to use.
Defaults to "odenet".
lemma_lang (str, optional): The language for lemmatization. Defaults to "de".
Returns:
str: A JSON-formatted string containing definitions and synonyms
of the word. If multiple parts of speech are found, it returns a list with
the possible parts of speech. If no words are found, it returns None.
Example:
>>> woerterbuch_definitions_and_synonyms('draussen')
>>> woerterbuch_definitions_and_synonyms('orange', 'a')
"""

wordnet: wn.Wordnet = wn.Wordnet(wordnet_name)
word_objects: Optional[List[wn.Word]] = None
word_object: Optional[wn.Word] = None
word_lemma: Optional[str] = None

word_objects = get_word_objects(word, wordnet)

# If no word was found get the lemma and try again.
if not word_objects:
word_lemma = lemmatize_word(word, lemma_lang)
if word_lemma:
word_objects = get_word_objects(word_lemma, wordnet)
if not word_objects:
return None

# If multiple objects are found then return the parts of speech to choose
# from.
if len(word_objects) > 1:
# If part of speech is set filter by it.
if part_of_speech:
word_object = filter_by_part_of_speech(word_objects, part_of_speech)
if not word_object:
return None
# Else return a list of part of speeches.
else:
return get_parts_of_speech(word_objects)
else:
word_object = word_objects[0]

definitions_and_synonyms = get_definitions_and_synonyms(word_object)

if definitions_and_synonyms:
# The german used in Switzerland doesn't use the Eszett. During
# lemmatization double s might be converted to Eszett. In this case the
# lemma should be None.
if word_lemma and word == word_lemma.replace("ß", "ss"):
word_lemma = None
# The return value includes the word used as parameter, the lemma (might be
# different to the param and the defintions with the synonyms for each
# definition.
output = {
"word param": word,
"word lemma": word_lemma,
"definitions": definitions_and_synonyms,
}
return json.dumps(output, ensure_ascii=False)
else:
return None

def process_command(command: str, args: str) -> Optional[str]:
"""
Process a command and its arguments using registered functions.
# TODO
def get_synonyms(word):
# lemmas = sorted(lemmas, key=str.casefold)
return word
Args:
command (str): The command to be executed.
args (str): The comma-separated arguments for the command.
Returns:
str: The result of the executed command or an "Invalid command" message.
The command returns a string or None.
"""

def process_command(command, args):
functions = {
"woerterbuch_definitions_and_synonyms": woerterbuch_definitions_and_synonyms,
"get_synonyms": get_synonyms,
"get_definitions": get_definitions
# "get_synonyms": get_synonyms,
# "get_definitions": get_definitions,
}

if command in functions:
func = functions[command]
parsed_args = [arg.strip() for arg in args.split(",")]

return func(*parsed_args)
else:
return "Invalid command"


while True:
# Wait input from the user (read a string from standard input)
line = input()

# Split the user input into command and arguments using the comma as a separator.
command, args = line.split(",", 1)

# Call the 'process_command' function with the extracted command and arguments.
result = process_command(command, args)
print(result)

# Print the result of the executed command or an error message.
print(result)

0 comments on commit 81fa133

Please sign in to comment.