From 961cae0753497985a56b411700e456ff2079e0c6 Mon Sep 17 00:00:00 2001 From: Kleber Noel <42589399+klebster2@users.noreply.github.com> Date: Mon, 11 Nov 2024 15:05:06 +0000 Subject: [PATCH] update --- README.md | 2 +- plugin/wordnet-cmp.vim | 153 +++++++------ python/plugin.py | 477 ++++++++++++++++------------------------- requirements.txt | 2 +- 4 files changed, 268 insertions(+), 366 deletions(-) diff --git a/README.md b/README.md index ab8ff9e..c46527c 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ A Vim/Neovim plugin that provides WordNet-based completions through nvim-cmp, of - Python 3.8 or higher - nvim-cmp installed and configured - Python packages: - [`wn`](https://github.com/goodmami/wn) + [`wn>=0.10.1`](https://github.com/goodmami/wn) ## Installation diff --git a/plugin/wordnet-cmp.vim b/plugin/wordnet-cmp.vim index c2e264d..42a3be2 100644 --- a/plugin/wordnet-cmp.vim +++ b/plugin/wordnet-cmp.vim @@ -36,20 +36,26 @@ function! s:wordnetcmp() -- Helper functions local function format_menu_source(word_class) - return string.format("WORDNET [%s]", word_class) + return "" end - local function create_rich_documentation(word, word_class, definitions) + local function create_rich_documentation(word, word_class, definition, sense_synonyms) local doc = {} table.insert(doc, string.format("# %s [%s]\n", word, word_class)) + table.insert(doc, string.format("_%s_\n", definition)) - for i, def in ipairs(definitions) do - table.insert(doc, string.format("%d. _%s_", i, def)) + if sense_synonyms and #sense_synonyms > 0 then + table.insert(doc, "\n**Synonyms:**") + for _, syn in ipairs(sense_synonyms) do + local syn_word = syn[1] + table.insert(doc, string.format("- %s", syn_word)) + end end - return table.concat(doc, "\n\n") + return table.concat(doc, "\n") end + source.new = function() return setmetatable({}, { __index = source }) end @@ -66,6 +72,62 @@ function! s:wordnetcmp() source.get_keyword_pattern = function() return [[\k\+]] end + -- Source file formatting in your plugin.vim + local function format_menu_source(word_class) + return "[" .. word_class .. "]" + end + + local function create_rich_documentation(item) + local doc = {} + local word = item.word + local word_class = item.data.word_class + local definition = item.data.definition + + -- Main header + table.insert(doc, string.format("# %s [%s]\n", word, word_class)) + + -- Main definition + table.insert(doc, string.format("_%s_\n", definition)) + + -- Handle sense-specific synonyms + if item.data.sense_synonyms and #item.data.sense_synonyms > 0 then + table.insert(doc, "\n**Synonyms:**") + for _, syn in ipairs(item.data.sense_synonyms) do + local syn_word, syn_def = syn[1], syn[2] + if syn_word ~= word then -- Don't include the word itself as its own synonym + table.insert(doc, string.format("- %s: _%s_", syn_word, syn_def)) + end + end + end + + -- Handle semantic relations (meronyms, hyponyms, etc.) + if item.data.chain and #item.data.chain > 1 then + -- Get relation type and display name + local relation_type = item.data.type + local relation_display = { + hyponym = "Types/Specific Forms", + hypernym = "General Categories", + meronym = "Parts/Components", + troponym = "Ways to", + similar = "Similar Terms", + } + + local relation_name = relation_display[relation_type] or relation_type:gsub("^%l", string.upper) + + table.insert(doc, string.format("\n**%s:**", relation_name)) + + -- Build relation chain display + local chain = item.data.chain + local chain_str = table.concat(chain, " → ") + table.insert(doc, string.format("- Chain: %s", chain_str)) + if item.data.definition then + table.insert(doc, string.format("- Definition: _%s_", item.data.definition)) + end + end + + return table.concat(doc, "\n") + end + source.complete = function(self, params, callback) local line = vim.fn.getline('.') local col = vim.fn.col('.') @@ -85,87 +147,36 @@ function! s:wordnetcmp() return end - -- Group by word and word class - local main_senses = {} - local related_items = {} - - -- First pass: organize main senses and related terms + -- Transform items for completion + local result = {} + for _, item in ipairs(items) do - if item.data.type == "main" then - local key = string.format("%s_%s", item.word, item.data.word_class) - if not main_senses[key] then - main_senses[key] = { - word = item.word, - word_class = item.data.word_class, - definitions = {}, - textEdit = item.textEdit - } - end - table.insert(main_senses[key].definitions, item.data.definition) - else - table.insert(related_items, { + -- if word starts with query_word + if item.word:find("^" .. query_word) then + local completion_item = { label = item.word, kind = cmp.lsp.CompletionItemKind.Text, - detail = format_menu_source(item.data.word_class), - menu = table.concat(item.data.chain, " → "), - documentation = { - kind = 'markdown', - value = string.format("# %s\n\n_%s_\n\n**Chain:**\n%s", - item.word, - item.data.definition, - table.concat(item.data.chain, " → ") - ) - }, + detail = item.menu, + documentation = item.documentation, filterText = query_word, - sortText = string.format("B%s%s", + sortText = string.format("%s%s%s", + item.data.type == "main" and "A" or + item.data.type == "synonym" and "B" or "C", item.data.word_class, - string.format("%03d", #(item.data.chain or {})) - ), - textEdit = item.textEdit - }) - end - end - - -- Create result list - local result = {} - - -- Add main senses (separated by word class) - for _, sense in pairs(main_senses) do - -- Create numbered definitions list - local def_list = {} - for i, def in ipairs(sense.definitions) do - table.insert(def_list, string.format("%d. _%s_", i, def)) - end - - table.insert(result, { - label = sense.word, - kind = cmp.lsp.CompletionItemKind.Class, - detail = format_menu_source(sense.word_class), - menu = string.format("%d definitions", #sense.definitions), - documentation = { - kind = 'markdown', - value = string.format("# %s [%s]\n\n%s", - sense.word, - sense.word_class, - table.concat(def_list, "\n\n") + item.word ) - }, - filterText = sense.word, - sortText = string.format("A%s%s", sense.word, sense.word_class), - textEdit = sense.textEdit - }) + } + table.insert(result, completion_item) + end end - -- Add related terms if we have any callback({ items = result, isIncomplete = false }) end - -- Register the source cmp.register_source('wordnet', source.new()) LUA_EOF endfunction - call s:wordnetcmp() diff --git a/python/plugin.py b/python/plugin.py index 98b32ce..090abbb 100644 --- a/python/plugin.py +++ b/python/plugin.py @@ -1,15 +1,17 @@ # Title: wordnet-cmp # Description: A plugin to help users Define, Use, and Research words. -# Last Change: 2nd November 2024 +# Last Change: 11th November 2024 # Maintainer: klebster2 +from __future__ import annotations + import re import subprocess import sys import typing as t -from collections import defaultdict from dataclasses import dataclass -from enum import Enum, auto +from enum import Enum from functools import lru_cache +from typing import Dict, List, Optional, Set, Tuple, TypeVar, Union, cast # Append to path local wordsense.py sys.path.append(".") @@ -53,48 +55,17 @@ def install(package: str): ), f"Failed to find a Wordnet dataset for language {TARGET_LANGUAGE}" -@dataclass -class RelatedTerm: - word: str - definition: str - relation_type: str - - -def install(package: str): - subprocess.check_call([sys.executable, "-m", "pip", "install", package]) - - -if not any(re.findall(r"pytest|py.test", sys.argv[0])): - try: - import vim # pylint: disable=import-error - except Exception as e: - print("No vim module available outside vim") - raise e -else: - vim = None # type: ignore - -try: - import wn -except ImportError: - install("wn") - import wn - -# Setup language configuration -TARGET_LANGUAGE = "en" if vim is None else vim.eval("g:wn_cmp_language") - -# Find appropriate WordNet dataset -ARTEFACT_NAME: t.Optional[str] = None -for dataset_name, item in wn.config.index.items(): - if item.get("language") == TARGET_LANGUAGE: - ARTEFACT_NAME = dataset_name + ":" + list(item["versions"].keys())[0] - break +""" +WordNet completion plugin for Vim/Neovim. +Provides word completion and documentation based on WordNet semantic relations. +""" -assert ( - ARTEFACT_NAME is not None -), f"Failed to find a WordNet dataset for language {TARGET_LANGUAGE}" +T = TypeVar("T") class WordClass(Enum): + """Supported word classes in WordNet.""" + NOUN = "n" VERB = "v" ADJECTIVE = "a" @@ -102,299 +73,219 @@ class WordClass(Enum): @classmethod def from_pos(cls, pos: str) -> "WordClass": + """Convert WordNet POS tag to WordClass.""" pos_map = { "n": cls.NOUN, "v": cls.VERB, "a": cls.ADJECTIVE, - "s": cls.ADJECTIVE, + "s": cls.ADJECTIVE, # 's' is also used for adjectives "r": cls.ADVERB, } return pos_map[pos.lower()] - def to_display_name(self) -> str: + @property + def display_name(self) -> str: + """Get display name for the word class.""" return self.name -@dataclass -class WordSense: - """Represents a single sense of a word with its relations.""" +@dataclass(frozen=True) +class CompletionMeta: + """Metadata for a completion item.""" - lemma: str word_class: WordClass definition: str - sense_number: int - synonyms: t.List[t.Tuple[str, str]] # (word, definition) - hyponyms: t.List[t.Tuple[str, str]] # (word, definition) - hypernyms: t.List[t.Tuple[str, str]] # (word, definition) - meronyms: t.List[t.Tuple[str, str]] # (word, definition) - troponyms: t.List[t.Tuple[str, str]] # (word, definition) - similar: t.List[t.Tuple[str, str]] # (word, definition) + relation_type: Optional[str] = None + relation_chain: Optional[Tuple[str, ...]] = None -@dataclass +@dataclass(frozen=True) class CompletionItem: - """A single completion item with all necessary metadata.""" + """A completion item with all necessary metadata.""" word: str - kind: str - menu: str - data: t.Dict[str, t.Any] - - -@dataclass -class RelationChain: - """Represents a chain of semantic relations.""" - - words: t.List[str] # Chain of words from source to target - relation_types: t.List[str] # Chain of relation types - final_definition: str # Definition of the target word - - -@dataclass -class SemanticDocument: - """A document containing all semantic information for a word and its related terms.""" - - primary_word: str - word_class: WordClass - definitions: t.List[t.Tuple[str, str]] # [(word, definition)] - relation_chains: t.Dict[str, t.List[RelationChain]] # target_word -> list of chains - - -import re -import subprocess -import sys -import typing as t -from collections import defaultdict -from dataclasses import dataclass -from enum import Enum, auto -from functools import lru_cache - - -class WordClass(Enum): - NOUN = "n" - VERB = "v" - ADJECTIVE = "a" - ADVERB = "r" - - @classmethod - def from_pos(cls, pos: str) -> "WordClass": - pos_map = { - "n": cls.NOUN, - "v": cls.VERB, - "a": cls.ADJECTIVE, - "s": cls.ADJECTIVE, - "r": cls.ADVERB, + meta: CompletionMeta + documentation: str + + def to_dict(self) -> Dict[str, Union[str, Dict[str, str]]]: + """Convert to dictionary format for Vim completion.""" + kind_suffix = f":{self.meta.relation_type}" if self.meta.relation_type else "" + return { + "word": self.word, + "kind": f"{self.meta.word_class.display_name}{kind_suffix}", + "menu": f"[{self.meta.word_class.display_name}]", + "documentation": {"kind": "markdown", "value": self.documentation}, } - return pos_map[pos.lower()] - - def to_display_name(self) -> str: - return self.name - - -@dataclass -class RelationChain: - """Represents a chain of semantic relations.""" - - words: t.List[str] # Chain of words from source to target - relation_types: t.List[str] # Chain of relation types - final_definition: str # Definition of the target word - - -@dataclass -class SemanticDocument: - """A document containing all semantic information for a word and its related terms.""" - - primary_word: str - word_class: WordClass - definitions: t.List[t.Tuple[str, str]] # [(word, definition)] - relation_chains: t.Dict[str, t.List[RelationChain]] # target_word -> list of chains class WordNetCompleter: - def __init__(self, artefact_name: str): - self.wn = wn.Wordnet(artefact_name) - self.MAX_DEPTH = 1 + """Provides word completions using WordNet semantic relations.""" - def _normalize_word(self, word: str) -> str: + def __init__(self, wordnet: wn.Wordnet) -> None: + """Initialize with a WordNet instance.""" + self.wn = wordnet + self._seen_combinations: Set[Tuple[str, str]] = set() + + @staticmethod + @lru_cache(maxsize=1024) + def _normalize_word(word: str) -> str: + """Normalize word for lookup, removing non-ASCII chars and special chars.""" word_lower = word.lower() word_lower_rep1 = re.sub(r"[^\x00-\x7F]+", "", word_lower) - word_lower_rep2 = re.sub(r"\W+", "", word_lower_rep1) - return word_lower_rep2 + return re.sub(r"\W+", "", word_lower_rep1) + + @lru_cache(maxsize=1024) + def _get_synsets(self, word: str, pos: str) -> List[wn.Synset]: + """Get all synsets for a word and POS.""" + return self.wn.synsets(word, pos=pos) - def _explore_synset( + @lru_cache(maxsize=1024) + def _format_documentation( self, - synset: "wn.Synset", + word: str, word_class: WordClass, - current_chain: t.List[str], - current_relations: t.List[str], - depth: int = 0, - seen_synsets: t.Optional[set] = None, - ) -> t.Tuple[t.List[t.Tuple[str, str]], t.Dict[str, t.List[RelationChain]]]: - """ - Recursively explore a synset and its related terms, tracking the chain of relations. - """ - if seen_synsets is None: - seen_synsets = set() - - if depth >= self.MAX_DEPTH or synset.id in seen_synsets: - return [], {} - - seen_synsets.add(synset.id) - - definitions = [(lemma, synset.definition()) for lemma in synset.lemmas()] - relation_chains: t.Dict[str, t.List[RelationChain]] = defaultdict(list) - - def process_related(related_synset: "wn.Synset", relation_type: str): - for lemma in related_synset.lemmas(): - new_chain = current_chain + [lemma] - new_relations = current_relations + [relation_type] - - relation_chains[lemma].append( - RelationChain( - words=new_chain, - relation_types=new_relations, - final_definition=related_synset.definition(), - ) - ) - - if depth < self.MAX_DEPTH - 1: - sub_defs, sub_chains = self._explore_synset( - related_synset, - word_class, - new_chain, - new_relations, - depth + 1, - seen_synsets, - ) - definitions.extend(sub_defs) - for word, chains in sub_chains.items(): - relation_chains[word].extend(chains) - - if word_class == WordClass.NOUN: - for hyponym in synset.hyponyms(): - process_related(hyponym, "hyponym") - for hypernym in synset.hypernyms(): - process_related(hypernym, "hypernym") - for meronym in synset.meronyms(): - process_related(meronym, "meronym") - elif word_class == WordClass.VERB: - for troponym in synset.hyponyms(): - process_related(troponym, "troponym") - elif word_class == WordClass.ADJECTIVE: - for similar in synset.get_related(): - process_related(similar, "similar") - - return definitions, relation_chains - - @lru_cache(maxsize=128) - def build_semantic_document( - self, word: str, word_class: WordClass - ) -> SemanticDocument: - """Build a comprehensive semantic document for a word and its relations.""" - all_definitions = [] - all_chains: t.Dict[str, t.List[RelationChain]] = defaultdict(list) - - for synset in self.wn.synsets(word, pos=word_class.value): - definitions, chains = self._explore_synset( - synset, - word_class, - current_chain=[word], - current_relations=[], - depth=0, - seen_synsets=set(), + definition: str, + relation_type: Optional[str] = None, + relation_chain: Optional[Tuple[str, ...]] = None, + ) -> str: + """Format completion documentation.""" + doc_parts = [f"# {word} [{word_class.display_name}]\n", f"{definition}\n"] + + if relation_type and relation_chain: + doc_parts.extend( + [ + f"**{relation_type.replace('_', ' ').title()}** of: {relation_chain[0]}", + f"Chain: {' → '.join(relation_chain)}", + ] ) - all_definitions.extend(definitions) - for target_word, word_chains in chains.items(): - all_chains[target_word].extend(word_chains) - - return SemanticDocument( - primary_word=word, - word_class=word_class, - definitions=list(set(all_definitions)), # Remove duplicates - relation_chains=dict(all_chains), - ) - - def format_completion_items( - self, doc: SemanticDocument - ) -> t.List[t.Dict[str, t.Any]]: - """Format a semantic document into completion items with rich metadata.""" - items = [] - - # Add main word definitions - for idx, (word, definition) in enumerate(doc.definitions, 1): - if word == doc.primary_word: - items.append( - { - "word": word, - "kind": doc.word_class.to_display_name(), - "menu": f"[{doc.word_class.to_display_name()}:{idx}] {definition}", - "data": { - "word_class": doc.word_class.to_display_name(), - "sense_number": idx, - "type": "main", - "chain": [word], - "relations": [], - "definition": definition, - }, - } - ) - - # Add related words with their relation chains - relation_markers = { - "hyponym": "spec", - "hypernym": "gen", - "meronym": "part", - "troponym": "manner", - "similar": "sim", - } - for target_word, chains in doc.relation_chains.items(): - for chain in chains: - # Create relation path string - relation_path = " → ".join( - f"{w}({relation_markers.get(r, r)})" - for w, r in zip(chain.words[:-1], chain.relation_types) - ) - - items.append( - { - "word": target_word, - "kind": f"{doc.word_class.to_display_name()}:{relation_markers.get(chain.relation_types[-1], 'rel')}", - "menu": f"[{doc.word_class.to_display_name()}] {chain.final_definition} via {relation_path}", - "data": { - "word_class": doc.word_class.to_display_name(), - "type": chain.relation_types[-1], - "chain": chain.words, - "relations": chain.relation_types, - "definition": chain.final_definition, - }, - } - ) - - return items - - def get_word_completions(self, word: str) -> t.List[t.Dict[str, t.Any]]: - """Get completions for a word with comprehensive semantic information.""" + return "\n".join(doc_parts) + + def _process_synset( + self, + synset: wn.Synset, + word_class: WordClass, + base_word: str, + completions: List[CompletionItem], + ) -> None: + """Process a single synset and add its completions.""" + # Process direct meanings + definition = cast(str, synset.definition()) + for lemma in synset.lemmas(): + key = (lemma, word_class.value) + if key not in self._seen_combinations: + self._seen_combinations.add(key) + meta = CompletionMeta(word_class=word_class, definition=definition) + doc = self._format_documentation(lemma, word_class, definition) + completions.append(CompletionItem(lemma, meta, doc)) + + # Process relations + for rel_type, related in [ + ("hypernym", synset.hypernyms()), + ("hyponym", synset.hyponyms()), + ("meronym", synset.meronyms()), + ("holonym", synset.holonyms()), + ]: + for rel_synset in related: + for lemma in rel_synset.lemmas(): + key = (lemma, word_class.value) + if key not in self._seen_combinations: + self._seen_combinations.add(key) + rel_def = cast(str, rel_synset.definition()) + chain = (base_word, lemma) + meta = CompletionMeta( + word_class=word_class, + definition=rel_def, + relation_type=rel_type, + relation_chain=chain, + ) + doc = self._format_documentation( + lemma, word_class, rel_def, rel_type, chain + ) + completions.append(CompletionItem(lemma, meta, doc)) + + def get_completions(self, word: str) -> List[Dict[str, Union[str, Dict[str, str]]]]: + """Get completions for a word.""" if not word or len(word) < 2: return [] normalized = self._normalize_word(word) - completions = [] + completions: List[CompletionItem] = [] + self._seen_combinations.clear() # Process each word class for word_class in WordClass: - # Build semantic document for this word class - doc = self.build_semantic_document(normalized, word_class) - if doc.definitions: # Only process if we found any meanings - # Convert to completion items - completions.extend(self.format_completion_items(doc)) + synsets = self._get_synsets(normalized, word_class.value) + for synset in synsets: + self._process_synset(synset, word_class, normalized, completions) + + return [item.to_dict() for item in completions] - return completions +if not any(re.findall(r"pytest|py.test", sys.argv[0])): + import pytest + + @pytest.fixture + def wordnet_mock(): + """Create a mock WordNet instance for testing.""" + + class MockSynset: + def definition(self): + return "test definition" + + def lemmas(self): + return ["test", "example"] + + def hypernyms(self): + return [] + + def hyponyms(self): + return [] + + def meronyms(self): + return [] + + def holonyms(self): + return [] + + class MockWordNet: + def synsets( + self, word: str, pos: str + ) -> List[MockSynset]: # pylint: disable + return [MockSynset()] + + return MockWordNet() + + def test_normalize_word(): + """Test word normalization.""" + completer = WordNetCompleter(wordnet_mock()) # type: ignore + assert completer._normalize_word("Test-Word") == "testword" + assert completer._normalize_word("Testé") == "test" + assert completer._normalize_word("test_word") == "testword" + + def test_empty_input(): + """Test handling of empty input.""" + completer = WordNetCompleter(wordnet_mock()) # type: ignore + assert completer.get_completions("") == [] + assert completer.get_completions("a") == [] + + def test_basic_completion(wordnet_mock): + """Test basic completion functionality.""" + completer = WordNetCompleter(wordnet_mock) + completions = completer.get_completions("test") + assert len(completions) > 0 + completion = completions[0] + assert "word" in completion + assert "kind" in completion + assert "menu" in completion + assert "documentation" in completion + +else: + pass try: # Global instance of the completer - completer = WordNetCompleter(ARTEFACT_NAME) + completer = WordNetCompleter(wn.Wordnet(ARTEFACT_NAME)) except Exception as e: # pylint: disable=broad-except subprocess.check_call( @@ -407,9 +298,9 @@ def get_word_completions(self, word: str) -> t.List[t.Dict[str, t.Any]]: ] ) # Global instance of the completer - completer = WordNetCompleter(ARTEFACT_NAME) + completer = WordNetCompleter(wn.Wordnet(ARTEFACT_NAME)) def wordnet_complete(base: str) -> t.List[t.Dict[str, t.Any]]: """Main completion function to be called from Vim/Lua.""" - return completer.get_word_completions(base) + return completer.get_completions(base) diff --git a/requirements.txt b/requirements.txt index 9535b7f..8bd6748 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ pylint pytest -wn +wn>=0.10.1 vim-client