diff --git a/pytextrank/base.py b/pytextrank/base.py index a28b3da..bbd6185 100644 --- a/pytextrank/base.py +++ b/pytextrank/base.py @@ -309,7 +309,7 @@ def __init__ ( # effectively, performs the same work as the `reset()` method; # called explicitly here for the sake of type annotations self.elapsed_time: float = 0.0 - self.lemma_graph: nx.DiGraph = nx.DiGraph() + self.lemma_graph: nx.Graph = nx.Graph() self.phrases: typing.List[Phrase] = [] self.ranks: typing.Dict[Lemma, float] = {} self.seen_lemma: typing.Dict[Lemma, typing.Set[int]] = OrderedDict() @@ -323,7 +323,7 @@ def reset ( removing any pre-existing state. """ self.elapsed_time = 0.0 - self.lemma_graph = nx.DiGraph() + self.lemma_graph = nx.Graph() self.phrases = [] self.ranks = {} self.seen_lemma = OrderedDict() @@ -400,7 +400,7 @@ def get_personalization ( # pylint: disable=R0201 def _construct_graph ( self - ) -> nx.DiGraph: + ) -> nx.Graph: """ Construct the [*lemma graph*](https://derwen.ai/docs/ptr/glossary/#lemma-graph). @@ -408,7 +408,7 @@ def _construct_graph ( returns: a directed graph representing the lemma graph """ - g = nx.DiGraph() + g = nx.Graph() # add nodes made of Lemma(lemma, pos) g.add_nodes_from(self.node_list) @@ -571,6 +571,8 @@ def _calc_discounted_normalised_rank ( returns: normalized rank metric """ + if len(span) < 1 : + return 0.0 non_lemma = len([tok for tok in span if tok.pos_ not in self.pos_kept]) non_lemma_discount = len(span) / (len(span) + (2.0 * non_lemma) + 1.0) @@ -877,7 +879,7 @@ def write_dot ( path: path for the output file; defaults to `"graph.dot"` """ - dot = graphviz.Digraph() + dot = graphviz.Graph() for lemma in self.lemma_graph.nodes(): rank = self.ranks[lemma] diff --git a/requirements-dev.txt b/requirements-dev.txt index 9c90f4b..34da7a8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,6 +5,7 @@ coverage flask grayskull jupyterlab >= 3.1.4 +jupyter-server >= 2.11.2 # not directly required, pinned by Snyk to avoid a vulnerability mistune mkdocs-git-revision-date-plugin mkdocs-material @@ -22,6 +23,6 @@ pymdown-extensions selenium setuptools >= 65.5.1 twine -wheel >= 0.38.0 -tornado>=6.3.3 # not directly required, pinned by Snyk to avoid a vulnerability -werkzeug>=3.0.1 # not directly required, pinned by Snyk to avoid a vulnerability +tornado >= 6.3.3 # not directly required, pinned by Snyk to avoid a vulnerability +werkzeug >= 3.0.1 # not directly required, pinned by Snyk to avoid a vulnerability +wheel >= 0.38.0 \ No newline at end of file diff --git a/tests/test_base.py b/tests/test_base.py index 44b78e3..4ad67d4 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -154,13 +154,13 @@ def test_stop_words (): for phrase in doc._.phrases[:5] ] - assert "words" in phrases + assert "sentences" in phrases # add `"word": ["NOUN"]` to the *stop words*, to remove instances # of `"word"` or `"words"` then see how the ranked phrases differ? nlp2 = spacy.load("en_core_web_sm") - nlp2.add_pipe("textrank", config={ "stopwords": { "word": ["NOUN"] } }) + nlp2.add_pipe("textrank", config={ "stopwords": { "sentence": ["NOUN"] } }) with open("dat/gen.txt", "r") as f: doc = nlp2(f.read())