diff --git a/harvesttext/__init__.py b/harvesttext/__init__.py index 77dccad..467220f 100644 --- a/harvesttext/__init__.py +++ b/harvesttext/__init__.py @@ -4,7 +4,7 @@ from .harvesttext import HarvestText from .resources import * -__version__ = '0.8.1.1' +__version__ = '0.8.1.2' def saveHT(htModel,filename): with open(filename, "wb") as f: diff --git a/harvesttext/summary.py b/harvesttext/summary.py index 9cdec46..b5a1885 100644 --- a/harvesttext/summary.py +++ b/harvesttext/summary.py @@ -29,8 +29,11 @@ def get_summary(self, sents, topK=5, stopwords=None, with_importance=False, stan # 使用standard_name,相似度可以基于实体链接的结果计算而更加准确 sent_tokens = [self.seg(sent.strip(), standard_name=standard_name, stopwords=stopwords) for sent in sents] if self.language == "en": - from pattern.en import lemma - sent_tokens = [[lemma(wd) for wd in sent] for sent in sent_tokens] + try: + from pattern.en import lemma + sent_tokens = [[lemma(wd) for wd in sent] for sent in sent_tokens] + except: + print(" `pattern` is not installed, so the english words will not be lemmatized, this might slightly hurt the summary quality") sent_tokens = [sent for sent in sent_tokens if len(sent) > 0] G = nx.Graph() for u, v in combinations(range(len(sent_tokens)), 2): diff --git a/requirements.txt b/requirements.txt index 4b19c49..6e5a794 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,5 +13,4 @@ python-louvain tqdm w3lib nltk -opencc-python-reimplemented -pattern \ No newline at end of file +opencc-python-reimplemented \ No newline at end of file