-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit cc62e56
Showing
10 changed files
with
715 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#+STARTUP: showall | ||
#+TITLE: Ci Dict | ||
#+OPTIONS: num:nil ^:{} toc:nil | ||
|
||
alfred workflow词典,针对英汉查询做了些优化 | ||
|
||
- 即时答案用本地词典,速度快 | ||
- Shift预览有道词典,信息全 | ||
- 回车打开有道网页(或本地词典),随心配 | ||
|
||
* 参考 | ||
- 本地查词基于[[https://github.com/tonyseek/macdict][macdict]] | ||
- 单词发音取自[[https://github.com/wensonsmith/YoudaoTranslator][YoudaoTranslator]] | ||
- 相似单词建议基于[[https://github.com/ahupp/bktree/blob/master/bktree.py][bktree]] |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
""" | ||
This module implements Burkhard-Keller Trees (bk-tree). bk-trees | ||
allow fast lookup of words that lie within a specified distance of a | ||
query word. For example, this might be used by a spell checker to | ||
find near matches to a mispelled word. | ||
The implementation is based on the description in this article: | ||
http://blog.notdot.net/2007/4/Damn-Cool-Algorithms-Part-1-BK-Trees | ||
Licensed under the PSF license: http://www.python.org/psf/license/ | ||
- Adam Hupp <[email protected]> | ||
""" | ||
|
||
# http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#Python | ||
def levenshtein(s, t): | ||
m, n = len(s), len(t) | ||
d = [range(n+1)] | ||
d += [[i] for i in range(1,m+1)] | ||
for i in range(0,m): | ||
for j in range(0,n): | ||
cost = 1 | ||
if s[i] == t[j]: cost = 0 | ||
|
||
d[i+1].append( min(d[i][j+1]+1, # deletion | ||
d[i+1][j]+1, #insertion | ||
d[i][j]+cost) #substitution | ||
) | ||
return d[m][n] | ||
|
||
|
||
class BKTree: | ||
def __init__(self, words, distfn=levenshtein): | ||
""" | ||
Create a new BK-tree from the given distance function and | ||
words. | ||
Arguments: | ||
distfn: a binary function that returns the distance between | ||
two words. Return value is a non-negative integer. the | ||
distance function must be a metric space. | ||
words: an iterable. produces values that can be passed to | ||
distfn | ||
""" | ||
self.distfn = distfn | ||
|
||
root = next(words) | ||
self.tree = (root, {}) | ||
|
||
for i in words: | ||
self._add_word(self.tree, i) | ||
|
||
def _add_word(self, parent, word): | ||
pword, children = parent | ||
d = self.distfn(word, pword) | ||
if d in children: | ||
self._add_word(children[d], word) | ||
else: | ||
children[d] = (word, {}) | ||
|
||
def query(self, word, n): | ||
""" | ||
Return all words in the tree that are within a distance of `n' | ||
from `word`. | ||
Arguments: | ||
word: a word to query on | ||
n: a non-negative integer that specifies the allowed distance | ||
from the query word. | ||
Return value is a list of tuples (distance, word), sorted in | ||
ascending order of distance. | ||
""" | ||
def rec(parent): | ||
pword, children = parent | ||
d = self.distfn(word, pword) | ||
results = [] | ||
if d <= n: | ||
results.append( (d, pword) ) | ||
|
||
for i in range(d-n, d+n+1): | ||
child = children.get(i) | ||
if child is not None: | ||
results.extend(rec(child)) | ||
return results | ||
|
||
# sort by distance | ||
return sorted(rec(self.tree)) | ||
|
||
|
||
def brute_query(word, words, distfn, n): | ||
"""A brute force distance query | ||
Arguments: | ||
word: the word to query for | ||
words: a iterable that produces words to test | ||
distfn: a binary function that returns the distance between a | ||
`word' and an item in `words'. | ||
n: an integer that specifies the distance of a matching word | ||
""" | ||
return [i for i in words | ||
if distfn(i, word) <= n] | ||
|
||
|
||
def maxdepth(tree, count=0): | ||
_, children = t | ||
if len(children): | ||
return max(maxdepth(i, c+1) for i in children.values()) | ||
else: | ||
return c | ||
|
||
|
||
def dict_words(dictfile="/usr/share/dict/american-english"): | ||
"Return an iterator that produces words in the given dictionary." | ||
return filter(len, map(str.strip, open(dictfile))) | ||
|
||
|
||
def timeof(fn, *args): | ||
import time | ||
t = time.time() | ||
res = fn(*args) | ||
print("time: ", (time.time() - t)) | ||
return res | ||
|
||
|
||
|
||
if __name__ == "__main__": | ||
|
||
tree = BKTree(dict_words('big.txt')) | ||
|
||
print(tree.query("abc", 2)) | ||
|
||
# dist = 1 | ||
# for i in ["book", "cat", "backlash", "scandal"]: | ||
# w = set(tree.query(i, dist)) - set([i]) | ||
# print "words within %d of %s: %r" % (dist, i, w) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import sys | ||
import os | ||
import json | ||
import subprocess | ||
import pickle | ||
import string | ||
import re | ||
from typing import List, Tuple | ||
from pathlib import Path | ||
from bktree import BKTree, dict_words | ||
import macdict | ||
|
||
|
||
def parse_Oxford_Chinese_Dictionary(content) -> List[Tuple[str, str]]: | ||
"""content in plain text, parse to structrued data | ||
不同词典库,格式不一样,此函数只解析 牛津英汉汉英词典 | ||
""" | ||
entries = [] | ||
pinyin = r"([a-z]*[āɑ̄ēīōūǖáɑ́éíóúǘǎɑ̌ěǐǒǔǚàɑ̀èìòùǜü]+[a-z]*)+" | ||
# (synoym) 词义 pīnyīn | ||
pattern = re.compile(r";? ?(\([a-zA-Z, ]+\))? ?(«[a-zA-Z, ]+»)? [\u4e00-\u9fff…]+ " + pinyin) | ||
for m in pattern.finditer(content): | ||
entries.append(m.group(0)) | ||
lines = [] | ||
for ent in entries: | ||
if ent.startswith(';') and lines: | ||
lines[-1] += ent | ||
else: | ||
lines.append(ent) | ||
results = [] | ||
for text in lines: | ||
text = re.sub(pinyin, "", text) | ||
text = re.sub(" +", " ", text) | ||
title = ','.join(re.findall(r"[\u4e00-\u9fff…]+", text)) | ||
results.append((title, text)) | ||
return results | ||
|
||
|
||
def alfred_item(title, subtitle, arg=None, is_suggestion=False): | ||
"""https://www.alfredapp.com/help/workflows/inputs/script-filter/json/""" | ||
arg = arg or title | ||
item = { | ||
"arg": arg, | ||
"title": title, | ||
"subtitle": subtitle or "👻本地查不到,按shift或enter网络查询", | ||
"valid": True, | ||
"quicklookurl": f"https://youdao.com/result?word={arg}&lang=en", | ||
"icon": { "path": "assets/translate-star.png" if is_suggestion else "assets/translate.png" }, | ||
"mods": { | ||
"cmd": { "subtitle": "🔊 ", "arg": arg, "valid": True }, | ||
"alt": { "subtitle": "📣 ", "arg": arg, "valid": True } | ||
}, | ||
"text": { | ||
"copy": title | ||
} | ||
} | ||
return item | ||
|
||
|
||
class Suggester: | ||
def __init__(self, cache_dir=None): | ||
cache_dir = cache_dir or os.getenv("alfred_workflow_data", "./dict_cache") | ||
self.cache_dir = Path(cache_dir) | ||
if self.cache_dir.exists() and (self.cache_dir / 'z.pkl').exists(): | ||
return | ||
self.cache_dir.mkdir(exist_ok=True, parents=True) | ||
atoz = string.ascii_lowercase | ||
trees = self._load_bktrees(atoz) | ||
for ch, tree in zip(atoz, trees): | ||
with open(self.cache_dir / f"{ch}.pkl", "wb") as f: | ||
pickle.dump(tree, f) | ||
|
||
@staticmethod | ||
def _load_bktrees(initials) -> List[BKTree]: | ||
trees = [] | ||
for ch in initials: | ||
tree = BKTree((w for w in dict_words("/usr/share/dict/words") | ||
if w[0].lower() == ch.lower())) | ||
trees.append(tree) | ||
return trees | ||
|
||
def suggest(self, word: str, max_count:int = 10) -> List[str]: | ||
if len(word) < 2: | ||
return [] | ||
if word[0].lower() not in string.ascii_lowercase: | ||
return [] | ||
cache_file = self.cache_dir / f"{word[0]}.pkl" | ||
with open(cache_file, "rb") as f: | ||
tree = pickle.load(f) | ||
results = tree.query(word, 2) | ||
return [s for i, s in results[:max_count] if s != word] | ||
|
||
|
||
def lookup(word: str) -> str: | ||
content = macdict.lookup_word(word) or '' | ||
_, *rest = content.split('|') | ||
return '|'.join(rest) | ||
|
||
|
||
def lookup_parsed(word) -> List[Tuple[str, str]]: | ||
page = lookup(word) | ||
parsed = parse_Oxford_Chinese_Dictionary(page) | ||
if not parsed: | ||
parsed = [(word, page)] | ||
return parsed | ||
|
||
|
||
def lookup_render(word) -> str: | ||
entries = lookup_parsed(word) | ||
return ';'.join(t for t, _ in entries) | ||
|
||
|
||
def main(): | ||
try: | ||
word = sys.argv[1] | ||
except IndexError: | ||
print('You did not enter any terms to look up in the Dictionary.') | ||
sys.exit() | ||
entries = lookup_parsed(word) | ||
items = [alfred_item(w, m, word) for w, m in entries[:5]] or [alfred_item(word, '')] | ||
max_suggestions = os.getenv('max_suggestions', '0') | ||
max_suggestions = int(max_suggestions) if max_suggestions.isdigit() else 0 | ||
if max_suggestions > 0: | ||
words = Suggester().suggest(word)[:max_suggestions] | ||
meanings = [lookup_render(w) for w in words] | ||
items += [alfred_item(w, m, is_suggestion=True) for w, m in zip(words, meanings) if m] | ||
print(json.dumps({"items": items}, ensure_ascii=False)) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Oops, something went wrong.