From 07f242eed8e9c2b51e7ee908d46fc789406adefe Mon Sep 17 00:00:00 2001 From: Yosshi999 Date: Sat, 16 Apr 2022 20:23:42 +0900 Subject: [PATCH] use safer text2mecab (#10) --- lib/open_jtalk | 2 +- pyopenjtalk/openjtalk.pyx | 9 ++++++++- pyopenjtalk/openjtalk/text2mecab.pxd | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/open_jtalk b/lib/open_jtalk index 427cfd7..d74d20a 160000 --- a/lib/open_jtalk +++ b/lib/open_jtalk @@ -1 +1 @@ -Subproject commit 427cfd761b78efb6094bea3c5bb8c968f0d711ab +Subproject commit d74d20ac25d212079acb40fdb7af69f11d38d8cf diff --git a/pyopenjtalk/openjtalk.pyx b/pyopenjtalk/openjtalk.pyx index c83704e..816f655 100644 --- a/pyopenjtalk/openjtalk.pyx +++ b/pyopenjtalk/openjtalk.pyx @@ -2,6 +2,7 @@ # cython: boundscheck=True, wraparound=True # cython: c_string_type=unicode, c_string_encoding=ascii +import errno import numpy as np cimport numpy as np @@ -165,7 +166,13 @@ cdef class OpenJTalk(object): if isinstance(text, str): text = text.encode("utf-8") cdef char buff[8192] - text2mecab(buff, text) + cdef int result = text2mecab(buff, 8192, text) + if result != 0: + if result == errno.ERANGE: + raise RuntimeError("Text is too long") + if result == errno.EINVAL: + raise RuntimeError("Invalid input for text2mecab") + raise RuntimeError("Unknown error: " + str(result)) Mecab_analysis(self.mecab, buff) mecab2njd(self.njd, Mecab_get_feature(self.mecab), Mecab_get_size(self.mecab)) _njd.njd_set_pronunciation(self.njd) diff --git a/pyopenjtalk/openjtalk/text2mecab.pxd b/pyopenjtalk/openjtalk/text2mecab.pxd index 6081757..3d44553 100644 --- a/pyopenjtalk/openjtalk/text2mecab.pxd +++ b/pyopenjtalk/openjtalk/text2mecab.pxd @@ -1,4 +1,4 @@ # distutils: language = c++ cdef extern from "text2mecab.h": - void text2mecab(char *output, const char *input) + int text2mecab(char *output, size_t sizeOfOutput, const char *input)