From 5ca810d224853070f0e2fd724dfe8c3b8ba7ab61 Mon Sep 17 00:00:00 2001 From: Marc Verhagen Date: Thu, 14 Apr 2016 12:20:29 -0400 Subject: [PATCH] The MergerWrapper now cleans out TLINKS before adding the merged ones. This prevents the merger from adding duplicates, it also takes care of stray duplicates in the input as well as nasty links like the reflexive weirdo links like [t1 < t1] that Blinker generated with its broken Timex linking. Related to issues https://github.com/tarsqi/ttk/issues/20 and https://github.com/tarsqi/ttk/issues/16. --- code/components/merging/wrapper.py | 3 +-- code/docmodel/document.py | 32 +++++++++++++++--------------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/code/components/merging/wrapper.py b/code/components/merging/wrapper.py index e0a7d94..0a49c18 100644 --- a/code/components/merging/wrapper.py +++ b/code/components/merging/wrapper.py @@ -57,6 +57,7 @@ def process(self): self.update_tarsqidoc(cp) def update_tarsqidoc(self, cp): + self.tarsqidoc.remove_tlinks() for n1, rest in cp.graph.edges.items(): for n2, edge in cp.graph.edges[n1].items(): if edge.constraint is not None: @@ -83,8 +84,6 @@ def add_edge(self, edge): attrs[RELATED_TO_TIME] = id2 else: attrs[RELATED_TO_EVENT_INSTANCE] = id2 - # TODO: write this method - # self.tarsqidoc.remove_tlinks() self.tarsqidoc.tags.add_tag(TLINK, -1, -1, attrs) diff --git a/code/docmodel/document.py b/code/docmodel/document.py index 62e86a3..a1299a4 100644 --- a/code/docmodel/document.py +++ b/code/docmodel/document.py @@ -16,28 +16,18 @@ class TarsqiDocument: """An instance of TarsqiDocument should contain all information that may be - needed by the wrappers to do their work. It will contain minimal document - structure in its elements variable, at this point just a list of - TarsqiDocElements. Elements will be typed and include the source string and - a dictionary of tags. + needed by the wrappers to do their work. It includes the source, metadata, + processing options, a set of identifier counters and a TagRepository. Instance Variables: - source - instance of DocSource - doctree - instance of TarsqiTree - elements - list of TarsqiDocElements + source - an instance of DocSource metadata - a dictionary options - the Options instance from the Tasqi instance + tags - an instance of TagRepository counters - a set of counters used to create unique identifiers - Note that more variables will be needed. Currently, several wrappers use - data from the Tarsqi instance, should check what these data are and get them - elsewhere, potentially by adding them here. - - Also note that he processing options are available to the wrappers only - through this class by accessing th eoptions variable. - - Also note that we may need a tarsqi_tags variable, to store those tags that - are not internal to any of the elements.""" + Note that he processing options are available to the wrappers only through + this class by accessing the options variable.""" def __init__(self, docsource, metadata): self.source = docsource @@ -110,6 +100,10 @@ def next_link_id(self, link_type): + self.counters['SLINK'] + self.counters['TLINK']) + def remove_tlinks(self): + """Remove all TLINK tags from the tags repository.""" + self.tags.remove_tags(TLINK) + def print_source(self, fname): """Print the original source of the document, without the tags to file fname.""" @@ -328,6 +322,12 @@ def append(self, tag): """Appends an instance of Tag to the tags list.""" self.tags.append(tag) + def remove_tags(self, tagname): + """Remove all tags with name=tagname. Rebuilds the indexes after + removing the tags.""" + self.tags = [t for t in self.tags if t.name != tagname] + self.index() + def merge(self): """Take the OpeningTags and ClosingTags in self.tmp and merge them into Tags. Raise errors if tags do not match."""