diff --git a/README.org b/README.org index a973839..ff6cd8b 100644 --- a/README.org +++ b/README.org @@ -93,7 +93,16 @@ If a word is not in its baseform, the synonyms for the baseform are looked up (t Set the following variables to change the behaviour of the package: - ~woerterbuch-org-buffer-display-function~ #'pop-to-buffer \\ - Function used to the display the org buffer with the definitions or synonyms. The function takes buffer as argument. + Function used to the display the org buffer with the definitions or synonyms. The function takes buffer as argument. There is also a function provided to show it in a side window: + #+BEGIN_SRC emacs-lisp + ;; Set the variable: + (setq woerterbuch-org-buffer-display-function + (apply-partially #'woerterbuch--display-in-side-window 'right nil nil)) + ;; Or use it in a let binding: + (let* ((woerterbuch-org-buffer-display-function + (apply-partially #'woerterbuch--display-in-side-window 'right nil nil))) + (woerterbuch-synonyms-show-in-org-buffer)) + #+END_SRC - ~woerterbuch-list-bullet-point~ "-" \\ String to use as list bullet point when converting synonyms or definitions to a list. - ~woerterbuch-insert-org-heading-format~ "%s %s\n\n%s" \\ @@ -130,3 +139,17 @@ See the [[./CHANGELOG.org][changelog]]. Use the issue tracker to reports bugs, suggest improvements or propose new features. If you want to contribute please open a pull request after having opened a new issue. In any case please check out the [[./CONTRIBUTING.org::*Contributing][contributing guidelines]] beforehand. + +** Remarks + +*** Synonyms + +**** Openthesaurus + +The text returned can contains additional information in parentheses. Examples: + +- aufsetzen (Schreiben, Kaufvertrag, ...) +- errichten (Testament, Patientenverfügung, ...) +- (die) Probe aufs Exempel + +This information is removed, when reading from the minibuffer. Else it is not removed and inserted into the buffer diff --git a/test.py b/test.py deleted file mode 100644 index 7214dc6..0000000 --- a/test.py +++ /dev/null @@ -1,18 +0,0 @@ -def greet_person(person_name): - return f"Hello, {person_name}!" - -def process_command(command, args): - functions = { - "greet": greet_person - } - - if command in functions: - return functions[command](args) - else: - return "Invalid command" - -while True: - line = input() - command, args = line.split(",", 1) - result = process_command(command, args) - print(result) diff --git a/woerterbuch.el b/woerterbuch.el index 9a89379..49c668f 100644 --- a/woerterbuch.el +++ b/woerterbuch.el @@ -65,30 +65,30 @@ ;; nur einfach der Text verwendet werden, ohne die einzelnen Synonyme zu ;; extrahieren. -(with-current-buffer (url-retrieve-synchronously "https://de.wiktionary.org/wiki/lassen") - (set-buffer-multibyte t) - (let* ((start (1+ (re-search-forward "\\(>Synonyme:

\\|>Sinnverwandte Wörter:

\\)"))) - (end (search-forward "")) - (dom (libxml-parse-html-region start end)) - (text (dom-texts dom)) - ;; Change the leading [1] to - for org-mode. - (text-cleaned (replace-regexp-in-string "\\[[^]]+]" "-" text)) - ;; Replace spaces with one space. - (text-cleaned (replace-regexp-in-string " +" " " text-cleaned)) - ;; Remove space before punctuation. - (text-cleaned (replace-regexp-in-string "\\( \\)[,:;.]" "" text-cleaned nil nil 1)) - ;; Remove space at end of line. - (text-cleaned (replace-regexp-in-string " $" "" text-cleaned)) - ;; Remove remarks with Siehe auch - (text-cleaned (replace-regexp-in-string "\\(; siehe auch:.*;\\|; siehe auch:.*$\\)" "" text-cleaned)) - ;; Second line and following have a space at the beginning. - (text-cleaned (replace-regexp-in-string "^ -" "-" text-cleaned)) - ;; Add spaces at the beginning if not starting with -. - (text-cleaned (replace-regexp-in-string "^[^-]" " " text-cleaned)) - ) - (kill-buffer) - text-cleaned - )) +;; (with-current-buffer (url-retrieve-synchronously "https://de.wiktionary.org/wiki/lassen") +;; (set-buffer-multibyte t) +;; (let* ((start (1+ (re-search-forward "\\(>Synonyme:

\\|>Sinnverwandte Wörter:

\\)"))) +;; (end (search-forward "")) +;; (dom (libxml-parse-html-region start end)) +;; (text (dom-texts dom)) +;; ;; Change the leading [1] to - for org-mode. +;; (text-cleaned (replace-regexp-in-string "\\[[^]]+]" "-" text)) +;; ;; Replace spaces with one space. +;; (text-cleaned (replace-regexp-in-string " +" " " text-cleaned)) +;; ;; Remove space before punctuation. +;; (text-cleaned (replace-regexp-in-string "\\( \\)[,:;.]" "" text-cleaned nil nil 1)) +;; ;; Remove space at end of line. +;; (text-cleaned (replace-regexp-in-string " $" "" text-cleaned)) +;; ;; Remove remarks with Siehe auch +;; (text-cleaned (replace-regexp-in-string "\\(; siehe auch:.*;\\|; siehe auch:.*$\\)" "" text-cleaned)) +;; ;; Second line and following have a space at the beginning. +;; (text-cleaned (replace-regexp-in-string "^ -" "-" text-cleaned)) +;; ;; Add spaces at the beginning if not starting with -. +;; (text-cleaned (replace-regexp-in-string "^[^-]" " " text-cleaned)) +;; ) +;; (kill-buffer) +;; text-cleaned +;; )) ;;; Requirements @@ -111,7 +111,9 @@ (defcustom woerterbuch-org-buffer-display-function #'pop-to-buffer "Function used to the display the org buffer with the definitions or synonyms. -The function takes buffer as argument." +The function takes buffer as argument. +The function `woerterbuch--display-in-side-window' may be used to show the org +buffer in a side window. Use with `apply-partially' to set the side." :type 'function) (defcustom woerterbuch-list-bullet-point "-" @@ -233,73 +235,18 @@ Returns a cons cell with the car being the word and cdr the bounds." (insert-file-contents path) (buffer-string))) -;;; Python Process - -(defvar woerterbuch--process nil - "The process running python.") - -(defconst woerterbuch--process-buffer-name "*woerterbuch-process*" - "Name to use for the process buffer.") - -;; (defvar woerterbuch--process-start-timeout 30 -;; "Number of seconds program waits for the definition of the python functions.") - -(defvar woerterbuch--process-python-init-path - (expand-file-name "woerterbuch.py" woerterbuch--package-directory) - "Path to the file that holds the python init code.") - -(defvar woerterbuch--process-output nil - "Capture the output of the process.") - -(defun woerterbuch--process-filter (_process output) - "Function called from the process. -It stores the output in `woerterbuch--process-output'." - (setq woerterbuch--process-output output)) - -(defun woerterbuch--process-start (&optional restart) - "Start and return the process runing python. -Loads the modules needed and defines the functions and variables. -If RESTART is non-nil then kill the process and start it again." - (when (and restart (process-live-p woerterbuch--process)) - (when-let* ((buffer-name woerterbuch--process-buffer-name) - (buffer (get-buffer buffer-name))) - (kill-buffer buffer)) - ;; Hope this will never result in an endless loop - (kill-process woerterbuch--process) - (while (process-live-p woerterbuch--process))) - (if (process-live-p woerterbuch--process) - woerterbuch--process - (let* ((process-connection-type nil) ; use a pipe - (coding-system-for-write 'utf-8-auto) - (coding-system-for-read 'utf-8-auto) - (path woerterbuch--process-python-init-path) - (process-buffer-name woerterbuch--process-buffer-name) - (process-buffer (get-buffer-create process-buffer-name)) - (process (start-process "woerterbuch python" process-buffer - woerterbuch-process-python-programm - "-u" path))) - (setq woerterbuch--process process)))) - -(defun woerterbuch--process-capture-output (code) - "Run CODE in the python process and capture it's output." - (let* ((process woerterbuch--process)) - (unwind-protect - (progn - (setq woerterbuch--process-output nil) - (set-process-filter process #'woerterbuch--process-filter) - ;; `accept-process-output' can be used to wait for the process output. - ;; Else it doesn't wait and the filter function will be called later on. - ;; Use a higher timeout as it can take a while to load the modules. - (unless (accept-process-output (process-send-string process code) - woerterbuch-process-timeout) - (error "Timeout reached before output was received")) - (when woerterbuch--process-output - (pcase woerterbuch--process-output - ("None\n" nil) - ("Invalid command\n" - (error "%s (%s)" "Python code is invalid" code)) - (output (json-parse-string output :object-type 'plist))))) - (set-process-filter process t)))) +(defun woerterbuch--display-in-side-window (side width height buffer) + "Display BUFFER in side window on SIDE specified and select it. +Specify WIDTH and HEIGHT or set em to nil to not change it manually." + (let* ((alist (list (cons 'side side))) + (alist (if width + (append alist (list (cons 'window-width width))) + alist)) + (alist (if height + (append alist (list (cons 'window-height height))) + alist))) + (select-window + (display-buffer-in-side-window buffer alist)))) ;;; German Definitions @@ -556,7 +503,7 @@ If TO-KILL-RING is non-nil it is added to the kill ring instead." ;; TODO Some words sadly inlcude remarks in brackets. Example: ;; A synonym for erstellen is errichten (Testament, Patientenverfügung, ...). ;; Need to clean the synonyms by removing the text starting with ' ('. - ;; Regexp is probably: " (.*". Rather test it. + ;; Regexp is probably: " (.*)". Rather test it. ;; Hmm, it is only needed to clean when using a function to select and insert a ;; synonym. Else it is better to leave it as it is. Example: ;; - abfassen, erstellen, aufsetzen (Schreiben, Kaufvertrag, ...), errichten @@ -599,7 +546,20 @@ nil if synsets are not empty." (map-elt raw-synonyms :baseforms) (car-safe (seq-into (plist-get raw-synonyms :baseforms) 'list)))) -(defun woerterbuch--synonyms-retrieve-as-list (word) +(defun woerterbuch--synonyms-clean-text (synonyms) + "Clean the text of each synonym in the list of SYNONYMS. +Synonyms sometimes contains additional information in parentheses. That +information should be stripped when reading from minibuffer." + (mapcar + (lambda (synonyms-group) + (mapcar (lambda (synonym) + ;; Sometimes it has additional information in brackets for the + ;; synonym. + (replace-regexp-in-string " ?(.*?) ?" "" synonym)) + synonyms-group)) + synonyms)) + +(defun woerterbuch--synonyms-retrieve-as-list (word &optional clean) "Retrieve the synonyms for WORD as a list of lists. Each list consist of the synonyms for one meaning of the word. Returns a cons with car being the word and cdr the synonyms. The @@ -611,7 +571,10 @@ Returns nil if no synonyms are retrieved." ;; If a baseform was found use that to retrieve the synonyms. (when baseform (setq raw-synonyms (woerterbuch--synonyms-retrieve-raw baseform))) - (let ((synonyms (woerterbuch--synonyms-to-list raw-synonyms))) + (let* ((synonyms (woerterbuch--synonyms-to-list raw-synonyms)) + (synonyms (if clean + (woerterbuch--synonyms-clean-text synonyms) + synonyms))) (cons (or baseform word) synonyms)))) (defun woerterbuch--synonyms-to-string (synonyms) @@ -657,7 +620,7 @@ synonyms." (defun woerterbuch--synonyms-read-synonym (word) "Read a synonym for WORD in the minibuffer and return it. Returns nil if no synonym was selected." - (if-let ((word-and-synonyms (woerterbuch--synonyms-retrieve-as-list word)) + (if-let ((word-and-synonyms (woerterbuch--synonyms-retrieve-as-list word t)) (word-used (car-safe word-and-synonyms)) (synonyms (cdr-safe word-and-synonyms))) (when-let ((synonyms-flattened (apply #'append synonyms)) diff --git a/woerterbuch.py b/woerterbuch.py deleted file mode 100644 index 8b122d6..0000000 --- a/woerterbuch.py +++ /dev/null @@ -1,220 +0,0 @@ -from typing import Optional, List, Dict, Type -import json - -import wn -from simplemma.simplemma import lemmatize - -import json - - -def get_word_objects(word: str, wordnet: wn.Wordnet) -> Optional[List[wn.Word]]: - """ - Get a list of Word objects for the given word. - - Args: - word (str): The word to look up. - wordnet (Wordnet): The Wordnet object for querying. - - Returns: - list: A list of Word objects or None. - """ - word_objects = wordnet.words(word) - if word_objects: - return word_objects - else: - return None - - -def lemmatize_word(word: str, lang: str) -> Optional[str]: - """ - Lemmatize a word using the specified language. - - Args: - word (str): The word to lemmatize. - lang (str, optional): The language for lemmatization. - - Returns: - str: The lemmatized form of the word or None. - """ - lemma = lemmatize(word, lang=lang) - if lemma != word: - return lemma - else: - return None - -def filter_by_part_of_speech( word_objects: List[wn.Word], part_of_speech: str,) -> Optional[wn.Word]: - """ - Filter a list of Word objects by a specific part of speech. - - Args: - word_objects (List[wn.Word]): A list of Word objects. - part_of_speech (str): The desired part of speech. - - Returns: - Optional[wn.Word]: The first word object matching the specified part of - speech or None if no match is found. - """ - for word_object in word_objects: - pos = word_object.pos - if part_of_speech and part_of_speech == pos: - return word_object - return None - -def get_parts_of_speech(word_objects: List[wn.Word]) -> Optional[str]: - """ - Get a JSON string of parts of speech from a list of Word objects. - - Args: - word_objects (List[wn.Word]): A list of Word objects. - - Returns: - Optional[str]: A JSON string containing a list of parts of speech - extracted from the Word objects. Returns None if the parts of speech - list is empty. - """ - parts_of_speech = [] - for word_object in word_objects: - parts_of_speech.append(word_object.pos) - if parts_of_speech: - return json.dumps(parts_of_speech, ensure_ascii=False) - else: - return None - - -def get_definitions_and_synonyms(word_object: wn.Word) -> List[Dict[str, List[str]]]: - """ - Get definitions and synonyms for a Word object. - - Args: - word_object (Word): A Word object. - - Returns: - list: A list of dictionaries containing definitions and synonyms. - """ - synsets = word_object.synsets() - definitions_and_synonyms = [] - for synset in synsets: - definition = synset.definition() - synonyms = synset.lemmas() - # Remove the word itself. - word_lemma = word_object.lemma() - if word_lemma in synonyms: - synonyms.remove(word_lemma) - definitions_and_synonyms.append( - {"definition": definition, "synonyms": synonyms} - ) - return definitions_and_synonyms - -def woerterbuch_definitions_and_synonyms(word: str, part_of_speech: str = None, wordnet_name: str = "odenet", lemma_lang: str = "de"): - """ - Get definitions and synonyms for a given word. - - Args: - word (str): The word to look up. - part_of_speech (str or None, optional): If provided, filter results - to show only words with the specified part of speech. Supported - values are 'n' for noun, 'a' for adjective/adverb, and 'v' for verb. - Defaults to None. - wordnet_name (str, optional): The name of the WordNet database to use. - Defaults to "odenet". - lemma_lang (str, optional): The language for lemmatization. Defaults to "de". - - Returns: - str: A JSON-formatted string containing definitions and synonyms - of the word. If multiple parts of speech are found, it returns a list with - the possible parts of speech. If no words are found, it returns None. - - Example: - >>> woerterbuch_definitions_and_synonyms('draussen') - >>> woerterbuch_definitions_and_synonyms('orange', 'a') - """ - - wordnet: wn.Wordnet = wn.Wordnet(wordnet_name) - word_objects: Optional[List[wn.Word]] = None - word_object: Optional[wn.Word] = None - word_lemma: Optional[str] = None - - word_objects = get_word_objects(word, wordnet) - - # If no word was found get the lemma and try again. - if not word_objects: - word_lemma = lemmatize_word(word, lemma_lang) - if word_lemma: - word_objects = get_word_objects(word_lemma, wordnet) - if not word_objects: - return None - - # If multiple objects are found then return the parts of speech to choose - # from. - if len(word_objects) > 1: - # If part of speech is set filter by it. - if part_of_speech: - word_object = filter_by_part_of_speech(word_objects, part_of_speech) - if not word_object: - return None - # Else return a list of part of speeches. - else: - return get_parts_of_speech(word_objects) - else: - word_object = word_objects[0] - - definitions_and_synonyms = get_definitions_and_synonyms(word_object) - - if definitions_and_synonyms: - # The german used in Switzerland doesn't use the Eszett. During - # lemmatization double s might be converted to Eszett. In this case the - # lemma should be None. - if word_lemma and word == word_lemma.replace("ß", "ss"): - word_lemma = None - # The return value includes the word used as parameter, the lemma (might be - # different to the param and the defintions with the synonyms for each - # definition. - output = { - "word-param": word, - "word-lemma": word_lemma, - "definitions": definitions_and_synonyms, - } - return json.dumps(output, ensure_ascii=False) - else: - return None - -def process_command(command: str, args: str) -> Optional[str]: - """ - Process a command and its arguments using registered functions. - - Args: - command (str): The command to be executed. - args (str): The comma-separated arguments for the command. - - Returns: - str: The result of the executed command or an "Invalid command" message. - The command returns a string or None. - """ - - functions = { - "woerterbuch_definitions_and_synonyms": woerterbuch_definitions_and_synonyms, - # "get_synonyms": get_synonyms, - # "get_definitions": get_definitions, - } - - if command in functions: - func = functions[command] - parsed_args = [arg.strip() for arg in args.split(",")] - - return func(*parsed_args) - else: - return "Invalid command" - - -while True: - # Wait input from the user (read a string from standard input) - line = input() - - # Split the user input into command and arguments using the comma as a separator. - command, args = line.split(",", 1) - - # Call the 'process_command' function with the extracted command and arguments. - result = process_command(command, args) - - # Print the result of the executed command or an error message. - print(result)