Skip to content

Commit

Permalink
use safer text2mecab (r9y9#10)
Browse files Browse the repository at this point in the history
  • Loading branch information
Yosshi999 authored Apr 16, 2022
1 parent a85521a commit 07f242e
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 3 deletions.
9 changes: 8 additions & 1 deletion pyopenjtalk/openjtalk.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# cython: boundscheck=True, wraparound=True
# cython: c_string_type=unicode, c_string_encoding=ascii

import errno
import numpy as np

cimport numpy as np
Expand Down Expand Up @@ -165,7 +166,13 @@ cdef class OpenJTalk(object):
if isinstance(text, str):
text = text.encode("utf-8")
cdef char buff[8192]
text2mecab(buff, text)
cdef int result = text2mecab(buff, 8192, text)
if result != 0:
if result == errno.ERANGE:
raise RuntimeError("Text is too long")
if result == errno.EINVAL:
raise RuntimeError("Invalid input for text2mecab")
raise RuntimeError("Unknown error: " + str(result))
Mecab_analysis(self.mecab, buff)
mecab2njd(self.njd, Mecab_get_feature(self.mecab), Mecab_get_size(self.mecab))
_njd.njd_set_pronunciation(self.njd)
Expand Down
2 changes: 1 addition & 1 deletion pyopenjtalk/openjtalk/text2mecab.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# distutils: language = c++

cdef extern from "text2mecab.h":
void text2mecab(char *output, const char *input)
int text2mecab(char *output, size_t sizeOfOutput, const char *input)

0 comments on commit 07f242e

Please sign in to comment.