Skip to content

Commit

Permalink
initial version.
Browse files Browse the repository at this point in the history
  • Loading branch information
cary-rowen committed Dec 13, 2021
1 parent 64aa3bb commit 29b1b9b
Show file tree
Hide file tree
Showing 30 changed files with 1,549,397 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Set default behaviour, in case users don't have core.autocrlf set.
* text=auto

# Try to ensure that po files in the repo does not include
# source code line numbers.
# Every person expected to commit po files should change their personal config file as described here:
# https://mail.gnome.org/archives/kupfer-list/2010-June/msg00002.html
*.po filter=cleanpo
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
addon/doc/*.css
addon/doc/en/
*_docHandler.py
*.html
*.ini
*.mo
*.pot
*.py[co]
*.nvda-addon
.sconsign.dblite
340 changes: 340 additions & 0 deletions COPYING.txt

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions addon/globalPlugins/Pinyin2Hanzi/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from __future__ import absolute_import

from .interface import AbstractHmmParams, AbstractDagParams
from .implement import DefaultHmmParams, DefaultDagParams
from .priorityset import Item, PrioritySet
from .util import is_chinese, remove_tone, normlize_pinyin, simplify_pinyin, is_pinyin, all_pinyin


from .dag import dag
from .viterbi import viterbi
54 changes: 54 additions & 0 deletions addon/globalPlugins/Pinyin2Hanzi/dag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# coding: utf-8
from __future__ import (print_function, unicode_literals, absolute_import)

from .interface import AbstractDagParams
from .priorityset import PrioritySet
from .util import xrange
import math

def dag(dag_params, pinyin_list, path_num=6, log=False):

assert( isinstance(dag_params, AbstractDagParams) )

pinyin_num = len(pinyin_list)
if pinyin_num == 0:
return []

D = [PrioritySet(path_num) for _ in xrange(pinyin_num)]

## idx is 1
for from_idx in xrange(0, 1):
for to_idx in xrange(from_idx, pinyin_num):
kvs = dag_params.get_phrase(pinyin_list[from_idx:to_idx+1], num=path_num)
for item in kvs:
word = [item[0]]
if log:
score = math.log(item[1])
else:
score = item[1]
D[to_idx].put(score, word)

for from_idx in xrange(1, pinyin_num):
prev_paths = D[from_idx-1]
for to_idx in xrange(from_idx, pinyin_num):
kvs = dag_params.get_phrase(pinyin_list[from_idx:to_idx+1], num=path_num)
for prev_item in prev_paths:
for item in kvs:
word = prev_item.path + [ item[0] ]
if log:
score = prev_item.score + math.log(item[1])
else:
score = prev_item.score * item[1]
D[to_idx].put(score, word)

result = [ item for item in D[-1] ]

return sorted(result, key=lambda item: item.score, reverse=True)








Loading

0 comments on commit 29b1b9b

Please sign in to comment.