Skip to content

Commit

Permalink
cli: + kanji; fix slow start
Browse files Browse the repository at this point in the history
  • Loading branch information
obfusk committed Jun 15, 2020
1 parent 249582b commit cd72526
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 18 deletions.
7 changes: 4 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ SHELL = /bin/bash
.PHONY: test

test:
export PYTHONPATH=$$PWD/src ;\
python3 -m jiten.jmdict --doctest ;\
python3 -m jiten.kanji --doctest ;\
export PYTHONPATH=$$PWD/src ; \
python3 -m jiten.freq --doctest && \
python3 -m jiten.jmdict --doctest && \
python3 -m jiten.kanji --doctest && \
python3 -m jiten.misc --doctest
49 changes: 42 additions & 7 deletions src/jiten/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,6 @@ def jmdict(ctx, lang, word, max, query): # {{{1
click.echo("query: " + click.style(q, fg = "bright_red"))
click.echo()
for e, rank in J.search(q, lang, max):
if ctx.obj["verbose"]:
click.echo("seq# " + click.style(str(e.seq), fg = "blue")
+ ", freq# " + click.style(str(rank), fg = "cyan"))
click.echo(" | ".join(
click.style(k.elem, fg = "bright_yellow") for k in e.kanji
))
Expand All @@ -74,16 +71,54 @@ def jmdict(ctx, lang, word, max, query): # {{{1
click.echo(click.style("* ", fg = "magenta") + t)
if e.usually_kana():
click.echo("[" + J.USUKANA + "]")
if ctx.obj["verbose"]:
click.echo("seq# " + click.style(str(e.seq), fg = "blue")
+ ", freq# " + click.style(str(rank), fg = "cyan"))
click.echo()
# }}}1

# TODO
@cli.command(help = "Search KanjiDic.")
@click.argument("query")
@click.option("-w", "--word", is_flag = True,
help = "Match whole word (same as \\b...\\b).")
@click.option("-m", "--max", default = None, type = click.INT,
help = "Maximum number of results.")
@click.argument("query", required = False)
@click.pass_context
def kanji(ctx, query): # {{{1
click.echo(click.style("TODO", fg = "red"))
ctx.exit(1)
def kanji(ctx, word, max, query): # {{{1
q = query or click.prompt("query")
if word: q = "\\b" + q + "\\b"
if ctx.obj["verbose"]:
click.echo("query: " + click.style(q, fg = "bright_red"))
click.echo()
for e in K.search(q, max):
click.echo(e.char)
click.echo(" | ".join(
click.style(r, fg = "bright_yellow") for r in e.on
) or "[no on readings]")
click.echo(" | ".join(
click.style(r, fg = "bright_green") for r in e.kun
) or "[no kun readings]")
click.echo(" | ".join(
click.style(r, fg = "cyan") for r in e.nanori
) or "[no name readings]")
for m in e.meaning:
click.echo(click.style("* ", fg = "magenta") + m)
if ctx.obj["verbose"]:
click.echo(
click.style(hex(ord(e.char)), fg = "blue")
+ ", " + click.style(str(e.strokes), fg = "yellow")
+ " strokes"
+ (", grade " + click.style(e.level, fg = "cyan")
if e.level else "")
+ (", freq# " + click.style(str(e.freq), fg = "magenta")
if e.freq else "")
+ (", old jlpt " + click.style(str(e.jlpt), fg = "blue")
if e.jlpt else "")
+ (", skip " + click.style(e.skip, fg = "yellow")
if e.skip else "")
)
click.echo()
# }}}1

# TODO
Expand Down
22 changes: 17 additions & 5 deletions src/jiten/freq.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,22 @@ def rank_freq(data):

def rank(w): return freq_rank.get(w, NOFREQ)

news_freq = process_freq(parse_freq(NEWSFREQ_FILE, True))
book_freq = process_freq(parse_freq(BOOKFREQ_FILE, False))

freq = merge_freq(news_freq, book_freq)
freq_rank = rank_freq(freq)
def setup():
global news_freq, book_freq, freq, freq_rank
if setup.done: return
setup.done = True

news_freq = process_freq(parse_freq(NEWSFREQ_FILE, True))
book_freq = process_freq(parse_freq(BOOKFREQ_FILE, False))

freq = merge_freq(news_freq, book_freq)
freq_rank = rank_freq(freq)
setup.done = False

if __name__ == "__main__":
if "--doctest" in sys.argv:
setup()
import doctest
if doctest.testmod(verbose = True)[0]: sys.exit(1)

# vim: set tw=70 sw=2 sts=2 et fdm=marker :
3 changes: 2 additions & 1 deletion src/jiten/jmdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,13 +292,14 @@ def jmdict2sqldb(data, file = SQLITE_FILE): # {{{1
""" # }}}1

def setup():
F.setup()
jmdict = parse_jmdict()
jmdict2sqldb(jmdict)

def search(q, langs = [DLANG], max_results = None, # {{{1
file = SQLITE_FILE):
entries = set()
rx = re.compile(q, re.I)
rx = re.compile(q, re.I | re.M)
mat = lambda x: rx.search(x) is not None
with sqlite_do(file) as c:
c.connection.create_function("matches", 1, mat)
Expand Down
27 changes: 25 additions & 2 deletions src/jiten/kanji.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,32 @@ def setup():
kanjidic = parse_kanjidic()
kanjidic2sqldb(kanjidic)

# TODO
def search(q, max_results = None, file = SQLITE_FILE): # {{{1
...
ent = lambda r: Entry(*(list(r[1:8]) + [ tuple(x.splitlines())
for x in r[8:] ]))
ideo = tuple(M.uniq(filter(M.isideo, q)))
with sqlite_do(file) as c:
if ideo:
for char in ideo:
for i, r in enumerate(c.execute("SELECT * FROM entry WHERE char = ?",
(char,))):
if max_results and i >= max_results: break
yield ent(r)
else:
rx = re.compile(q, re.I | re.M)
mat1 = lambda x: rx.search(x) is not None
mat2 = lambda x: rx.search(x.replace(".", "")
.replace("-", "")) is not None
c.connection.create_function("matches1", 1, mat1)
c.connection.create_function("matches2", 1, mat2)
for i, r in enumerate(c.execute("""
SELECT * FROM entry WHERE
matches1(on_) OR matches1(kun) OR matches1(nanori) OR
matches2(on_) OR matches2(kun) OR matches2(nanori) OR
matches1(meaning)
""")):
if max_results and i >= max_results: break
yield ent(r)
# }}}1

if __name__ == "__main__":
Expand Down
2 changes: 2 additions & 0 deletions src/jiten/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,14 @@
ispunc = lambda c: 0x3000 <= ord(c) <= 0x303f
ishiragana = lambda c: 0x3040 <= ord(c) <= 0x309f
iskatakana = lambda c: 0x30a0 <= ord(c) <= 0x30ff

iskanji = lambda c: 0x4e00 <= ord(c) <= 0x9faf
iscompat = lambda c: 0xf900 <= ord(c) <= 0xfaff
isuniext = lambda c: 0x3400 <= ord(c) <= 0x4dbf or \
0x20000 <= ord(c) <= 0x2ebef

iskana = lambda c: ishiragana(c) or iskatakana(c)
isideo = lambda c: iskanji(c) or iscompat(c) or isuniext(c)
isjap = lambda c: iskanji(c) or iskana(c) # probably
isokjap = lambda c: isjap(c) or c in OKPUNC # probably

Expand Down

0 comments on commit cd72526

Please sign in to comment.