Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
etern committed Dec 17, 2023
0 parents commit cc62e56
Show file tree
Hide file tree
Showing 10 changed files with 715 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.DS_Store
14 changes: 14 additions & 0 deletions README.org
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#+STARTUP: showall
#+TITLE: Ci Dict
#+OPTIONS: num:nil ^:{} toc:nil

alfred workflow词典,针对英汉查询做了些优化

- 即时答案用本地词典,速度快
- Shift预览有道词典,信息全
- 回车打开有道网页(或本地词典),随心配

* 参考
- 本地查词基于[[https://github.com/tonyseek/macdict][macdict]]
- 单词发音取自[[https://github.com/wensonsmith/YoudaoTranslator][YoudaoTranslator]]
- 相似单词建议基于[[https://github.com/ahupp/bktree/blob/master/bktree.py][bktree]]
Binary file added assets/translate-star.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/translate.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
151 changes: 151 additions & 0 deletions bktree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
"""
This module implements Burkhard-Keller Trees (bk-tree). bk-trees
allow fast lookup of words that lie within a specified distance of a
query word. For example, this might be used by a spell checker to
find near matches to a mispelled word.
The implementation is based on the description in this article:
http://blog.notdot.net/2007/4/Damn-Cool-Algorithms-Part-1-BK-Trees
Licensed under the PSF license: http://www.python.org/psf/license/
- Adam Hupp <[email protected]>
"""

# http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#Python
def levenshtein(s, t):
m, n = len(s), len(t)
d = [range(n+1)]
d += [[i] for i in range(1,m+1)]
for i in range(0,m):
for j in range(0,n):
cost = 1
if s[i] == t[j]: cost = 0

d[i+1].append( min(d[i][j+1]+1, # deletion
d[i+1][j]+1, #insertion
d[i][j]+cost) #substitution
)
return d[m][n]


class BKTree:
def __init__(self, words, distfn=levenshtein):
"""
Create a new BK-tree from the given distance function and
words.
Arguments:
distfn: a binary function that returns the distance between
two words. Return value is a non-negative integer. the
distance function must be a metric space.
words: an iterable. produces values that can be passed to
distfn
"""
self.distfn = distfn

root = next(words)
self.tree = (root, {})

for i in words:
self._add_word(self.tree, i)

def _add_word(self, parent, word):
pword, children = parent
d = self.distfn(word, pword)
if d in children:
self._add_word(children[d], word)
else:
children[d] = (word, {})

def query(self, word, n):
"""
Return all words in the tree that are within a distance of `n'
from `word`.
Arguments:
word: a word to query on
n: a non-negative integer that specifies the allowed distance
from the query word.
Return value is a list of tuples (distance, word), sorted in
ascending order of distance.
"""
def rec(parent):
pword, children = parent
d = self.distfn(word, pword)
results = []
if d <= n:
results.append( (d, pword) )

for i in range(d-n, d+n+1):
child = children.get(i)
if child is not None:
results.extend(rec(child))
return results

# sort by distance
return sorted(rec(self.tree))


def brute_query(word, words, distfn, n):
"""A brute force distance query
Arguments:
word: the word to query for
words: a iterable that produces words to test
distfn: a binary function that returns the distance between a
`word' and an item in `words'.
n: an integer that specifies the distance of a matching word
"""
return [i for i in words
if distfn(i, word) <= n]


def maxdepth(tree, count=0):
_, children = t
if len(children):
return max(maxdepth(i, c+1) for i in children.values())
else:
return c


def dict_words(dictfile="/usr/share/dict/american-english"):
"Return an iterator that produces words in the given dictionary."
return filter(len, map(str.strip, open(dictfile)))


def timeof(fn, *args):
import time
t = time.time()
res = fn(*args)
print("time: ", (time.time() - t))
return res



if __name__ == "__main__":

tree = BKTree(dict_words('big.txt'))

print(tree.query("abc", 2))

# dist = 1
# for i in ["book", "cat", "backlash", "scandal"]:
# w = set(tree.query(i, dist)) - set([i])
# print "words within %d of %s: %r" % (dist, i, w)

133 changes: 133 additions & 0 deletions dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#!/usr/bin/env python3

import sys
import os
import json
import subprocess
import pickle
import string
import re
from typing import List, Tuple
from pathlib import Path
from bktree import BKTree, dict_words
import macdict


def parse_Oxford_Chinese_Dictionary(content) -> List[Tuple[str, str]]:
"""content in plain text, parse to structrued data
不同词典库,格式不一样,此函数只解析 牛津英汉汉英词典
"""
entries = []
pinyin = r"([a-z]*[āɑ̄ēīōūǖáɑ́éíóúǘǎɑ̌ěǐǒǔǚàɑ̀èìòùǜü]+[a-z]*)+"
# (synoym) 词义 pīnyīn
pattern = re.compile(r";? ?(\([a-zA-Z, ]+\))? ?(«[a-zA-Z, ]+»)? [\u4e00-\u9fff…]+ " + pinyin)
for m in pattern.finditer(content):
entries.append(m.group(0))
lines = []
for ent in entries:
if ent.startswith(';') and lines:
lines[-1] += ent
else:
lines.append(ent)
results = []
for text in lines:
text = re.sub(pinyin, "", text)
text = re.sub(" +", " ", text)
title = ','.join(re.findall(r"[\u4e00-\u9fff…]+", text))
results.append((title, text))
return results


def alfred_item(title, subtitle, arg=None, is_suggestion=False):
"""https://www.alfredapp.com/help/workflows/inputs/script-filter/json/"""
arg = arg or title
item = {
"arg": arg,
"title": title,
"subtitle": subtitle or "👻本地查不到,按shift或enter网络查询",
"valid": True,
"quicklookurl": f"https://youdao.com/result?word={arg}&lang=en",
"icon": { "path": "assets/translate-star.png" if is_suggestion else "assets/translate.png" },
"mods": {
"cmd": { "subtitle": "🔊 ", "arg": arg, "valid": True },
"alt": { "subtitle": "📣 ", "arg": arg, "valid": True }
},
"text": {
"copy": title
}
}
return item


class Suggester:
def __init__(self, cache_dir=None):
cache_dir = cache_dir or os.getenv("alfred_workflow_data", "./dict_cache")
self.cache_dir = Path(cache_dir)
if self.cache_dir.exists() and (self.cache_dir / 'z.pkl').exists():
return
self.cache_dir.mkdir(exist_ok=True, parents=True)
atoz = string.ascii_lowercase
trees = self._load_bktrees(atoz)
for ch, tree in zip(atoz, trees):
with open(self.cache_dir / f"{ch}.pkl", "wb") as f:
pickle.dump(tree, f)

@staticmethod
def _load_bktrees(initials) -> List[BKTree]:
trees = []
for ch in initials:
tree = BKTree((w for w in dict_words("/usr/share/dict/words")
if w[0].lower() == ch.lower()))
trees.append(tree)
return trees

def suggest(self, word: str, max_count:int = 10) -> List[str]:
if len(word) < 2:
return []
if word[0].lower() not in string.ascii_lowercase:
return []
cache_file = self.cache_dir / f"{word[0]}.pkl"
with open(cache_file, "rb") as f:
tree = pickle.load(f)
results = tree.query(word, 2)
return [s for i, s in results[:max_count] if s != word]


def lookup(word: str) -> str:
content = macdict.lookup_word(word) or ''
_, *rest = content.split('|')
return '|'.join(rest)


def lookup_parsed(word) -> List[Tuple[str, str]]:
page = lookup(word)
parsed = parse_Oxford_Chinese_Dictionary(page)
if not parsed:
parsed = [(word, page)]
return parsed


def lookup_render(word) -> str:
entries = lookup_parsed(word)
return ';'.join(t for t, _ in entries)


def main():
try:
word = sys.argv[1]
except IndexError:
print('You did not enter any terms to look up in the Dictionary.')
sys.exit()
entries = lookup_parsed(word)
items = [alfred_item(w, m, word) for w, m in entries[:5]] or [alfred_item(word, '')]
max_suggestions = os.getenv('max_suggestions', '0')
max_suggestions = int(max_suggestions) if max_suggestions.isdigit() else 0
if max_suggestions > 0:
words = Suggester().suggest(word)[:max_suggestions]
meanings = [lookup_render(w) for w in words]
items += [alfred_item(w, m, is_suggestion=True) for w, m in zip(words, meanings) if m]
print(json.dumps({"items": items}, ensure_ascii=False))


if __name__ == '__main__':
main()
Binary file added icon.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit cc62e56

Please sign in to comment.