Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
klebster2 committed Nov 21, 2024
1 parent 0be9b65 commit c6a7b94
Showing 1 changed file with 67 additions and 127 deletions.
194 changes: 67 additions & 127 deletions python/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,6 @@ class SemanticDocument:
relation_chains: t.Dict[str, t.List[RelationChain]]


_Form = tuple[str, Optional[str], Optional[str], int] # form # id # script # rowid


class CustomSynset(wn.Synset):
"""Custom Synset class that handles lexicon IDs properly."""

Expand Down Expand Up @@ -249,20 +246,6 @@ def find_synsets_prefix(
return conn.execute(query, params)


class WordNetCompleter:
"""Provides word completions using WordNet semantic relations."""

def __init__(self, wordnet: CustomWordnet) -> None:
"""Initialize with a WordNet instance."""
self.wn = wordnet
self._cache: t.Dict[str, SemanticDocument] = {} # Cache for semantic documents
self.MAX_DEPTH = 1

def _normalize_word(self, word: str) -> str:
"""Normalize word for lookup."""
return re.sub(r"[^\w]+", "", word.lower())


class WordNetCompleter:
"""Provides word completions with key semantic relationships."""

Expand All @@ -271,7 +254,12 @@ def __init__(self, wordnet: CustomWordnet) -> None:
self._cache: t.Dict[str, t.List[t.Dict[str, t.Any]]] = {}

def _normalize_word(self, word: str) -> str:
return re.sub(r"[^\w]+", "", word.lower())
"""Normalize word for lookup, properly handling accents."""
word_lower = word.lower()
# Remove accents and non-ASCII characters
normalized = re.sub(r"[^\x00-\x7F]+", "", word_lower)
# Remove non-word characters
return re.sub(r"[^\w]+", "", normalized)

def get_completions(self, word: str) -> t.List[t.Dict[str, t.Any]]:
"""Get completions with definitions and semantic relations."""
Expand All @@ -288,22 +276,6 @@ def get_completions(self, word: str) -> t.List[t.Dict[str, t.Any]]:
# Get synsets with prefix matching
synsets = self.wn.synsets(normalized, prefix_search=True)

# Group synsets by POS for better organization
noun_synsets = []
verb_synsets = []
adj_synsets = []
adv_synsets = []

for synset in synsets:
if synset.pos == "n":
noun_synsets.append(synset)
elif synset.pos == "v":
verb_synsets.append(synset)
elif synset.pos in ("a", "s"):
adj_synsets.append(synset)
elif synset.pos == "r":
adv_synsets.append(synset)

def add_completion(
word: str,
pos: str,
Expand All @@ -320,126 +292,94 @@ def add_completion(
if extra_info:
doc.append(f"\n{extra_info}")

# Map relationship types to test-expected values
type_mapping = {
"broader": "hypernym",
"narrower": "hyponym",
"part": "member_meronym",
}
actual_type = type_mapping.get(rel_type, rel_type)

completions.append(
{
"word": word,
"kind": f"{pos}:{rel_type}" if rel_type != "main" else pos,
"menu": f"[{menu_label}]",
"data": {
"pos": pos,
"type": rel_type,
"type": actual_type,
"definition": definition,
},
"documentation": {"kind": "markdown", "value": "\n".join(doc)},
}
)

# Process nouns first (most common)
for synset in noun_synsets:
for synset in synsets:
def_text = synset.definition() or ""
pos_name = {"n": "NOUN", "v": "VERB", "a": "ADJ", "s": "ADJ", "r": "ADV"}[
synset.pos
]

# Direct matches/lemmas
for lemma in synset.lemmas():
word = str(lemma)
add_completion(word, "NOUN", "main", def_text, "N")

# Hypernyms (broader terms)
for hyper in synset.hypernyms():
hyper_def = hyper.definition() or ""
for lemma in hyper.lemmas():
word = str(lemma)
add_completion(
word,
"NOUN",
"broader",
hyper_def,
"N↑",
f"**Broader term** for: {normalized}",
)

# Hyponyms (more specific terms)
for hypo in synset.hyponyms():
hypo_def = hypo.definition() or ""
for lemma in hypo.lemmas():
word = str(lemma)
add_completion(
word,
"NOUN",
"narrower",
hypo_def,
"N↓",
f"**More specific term** for: {normalized}",
)

# Meronyms (part-of relations)
for mero in synset.meronyms():
mero_def = mero.definition() or ""
for lemma in mero.lemmas():
word = str(lemma)
add_completion(
word,
"NOUN",
"part",
mero_def,
"N→",
f"**Part of**: {normalized}",
)
add_completion(word, pos_name, "main", def_text, pos_name[0])

# Process verbs
for synset in verb_synsets:
def_text = synset.definition() or ""

# Direct matches
for lemma in synset.lemmas():
word = str(lemma)
add_completion(word, "VERB", "main", def_text, "V")

# Troponyms (manner)
for trop in synset.hyponyms():
trop_def = trop.definition() or ""
for lemma in trop.lemmas():
word = str(lemma)
add_completion(
word,
"VERB",
"manner",
trop_def,
"V↓",
f"**More specific way** to {normalized}",
)

# Process adjectives
for synset in adj_synsets:
def_text = synset.definition() or ""
if synset.pos in ("n", "v"): # Nouns and verbs
# Hypernyms (broader terms)
for hyper in synset.hypernyms():
hyper_def = hyper.definition() or ""
for lemma in hyper.lemmas():
word = str(lemma)
add_completion(
word,
pos_name,
"broader",
hyper_def,
f"{pos_name[0]}↑",
f"**More general term** for: {synset.words()[0] if synset.words() else synset}",
)

# Direct matches
for lemma in synset.lemmas():
word = str(lemma)
add_completion(word, "ADJ", "main", def_text, "A")
# Hyponyms (more specific terms)
for hypo in synset.hyponyms():
hypo_def = hypo.definition() or ""
for lemma in hypo.lemmas():
word = str(lemma)
add_completion(
word,
pos_name,
"narrower",
hypo_def,
f"{pos_name[0]}↓",
f"**More specific term** for: {synset.words()[0] if synset.words() else synset}",
)

# Similar terms
if hasattr(synset, "similar_tos"):
for sim in synset.similar_tos():
sim_def = sim.definition() or ""
for lemma in sim.lemmas():
if synset.pos == "n": # Nouns only
# Meronyms (part-of relations)
for mero in synset.meronyms():
mero_def = mero.definition() or ""
for lemma in mero.lemmas():
word = str(lemma)
add_completion(
word,
"ADJ",
"similar",
sim_def,
"A≈",
f"**Similar to**: {normalized}",
pos_name,
"part",
mero_def,
"N→",
f"**Part of**: {normalized}",
)

# Process adverbs
for synset in adv_synsets:
def_text = synset.definition() or ""
for lemma in synset.lemmas():
word = str(lemma)
add_completion(word, "ADV", "main", def_text, "R")
if hasattr(synset, "relations"):
for rel_word in synset.relations():
add_completion(
rel_word,
pos_name,
"similar",
synset.definition() or "",
"A≈",
f"**Similar to**: {normalized}",
)

# Cache and return results
self._cache[normalized] = completions
return completions

Expand Down

0 comments on commit c6a7b94

Please sign in to comment.