Updated comparing and sorting and fixed some print statements (issue #79

).
tarsqi · Jan 3, 2021 · 8f1bc1d · 8f1bc1d
1 parent a8bc653
commit 8f1bc1d
Show file tree

Hide file tree

Showing 12 changed files with 298 additions and 184 deletions.
diff --git a/components/classifier/vectors.py b/components/classifier/vectors.py
@@ -155,21 +155,6 @@ def make_vector(tarsqidoc, s, tag):
         return None
 
 
-def compare_features(f1, f2):
-    """Comparison method for feature sorting."""
-    def get_prefix(feature):
-        if feature.startswith('e1-'): return 'e1'
-        if feature.startswith('e2-'): return 'e2'
-        if feature.startswith('e-'): return 'e'
-        if feature.startswith('t-'): return 't'
-        return 'x'
-    prefixes = {'e': 1, 't': 2, 'e1': 3, 'e2': 4}
-    p1 = get_prefix(f1)
-    p2 = get_prefix(f2)
-    prefix_comparison = cmp(p1, p2)
-    return cmp(f1, f2) if prefix_comparison == 0 else prefix_comparison
-
-
 def abbreviate(attr):
     """Abbreviate the feature name, but abbreviate only the part without the
     prefix (which can be e-, t-, e1- or e2-)."""
@@ -199,7 +184,7 @@ def __init__(self, tarsqidoc, sentence, source, source_tag, features):
 
     def __str__(self):
         feats = []
-        for feat in self.sorted_features():
+        for feat in sorted(self.features.keys()):
             val = self.features[feat]
             feats.append("%s=%s" % (feat, val))
         return ' '.join(feats)
@@ -210,9 +195,6 @@ def is_event_vector(self):
     def is_timex_vector(self):
         return False
 
-    def sorted_features(self):
-        return sorted(list(self.features.keys()), compare_features)
-
     def add_feature(self, feat, val):
         self.features[abbreviate(feat)] = val
 
@@ -260,7 +242,7 @@ class PairVector(Vector):
 
     def __init__(self, tarsqidoc, prefix1, vector1, prefix2, vector2):
         """Initialize a pair vector from two object vectors by setting an
-        identifier and by adding the features of th eobject vectors."""
+        identifier and by adding the features of the object vectors."""
         self.tarsqidoc = tarsqidoc
         self.filename = self._get_filename()
         self.source = (vector1.source, vector2.source)

diff --git a/components/common_modules/constituent.py b/components/common_modules/constituent.py
@@ -54,14 +54,23 @@ def __reversed__(self):
     def __nonzero__(self):
         return True
 
-    def __cmp__(self, other):
-        # NOTE: in some cases the matchLabel method in FSA.py checks for
-        # equality of a constituent and a string using == in which case this
-        # method is invoked, which would throw an error without the first two
-        # lines
-        if isinstance(other, type('')):
-            return 1
-        return cmp(self.begin, other.begin)
+    def __eq__(self, other):
+        return self.begin == other.begin
+
+    def __ne__(self, other):
+        return self.begin != other.begin
+
+    def __lt__(self, other):
+        return self.begin < other.begin
+
+    def __le__(self, other):
+        return self.begin <= other.begin
+
+    def __gt__(self, other):
+        return self.begin > other.begin
+
+    def __ge__(self, other):
+        return self.begin >= other.begin
 
     def __len__(self):
         """Returns the lenght of the dtrs variable."""

diff --git a/components/merging/sputlink/rules/objects.py b/components/merging/sputlink/rules/objects.py
@@ -1,6 +1,7 @@
 
-
 from __future__ import print_function
+
+
 class ObjectList(object):
     """Class that provides interface for global PointLink and Link data
     bases. Just a wrapper around a list. Assumes that all elements are 
@@ -28,7 +29,6 @@ def __str__(self):
         return returnStr
 
 
-
 class TemporalObject(object):
     """Abstract class that is a super class of all link and node classes. Only
     responsible for providing unique IDs."""
@@ -52,7 +52,6 @@ def addOutLink(self,pointlink):
         self.outLinks.append(pointlink)
         self.isDirty = 1
 
-
 
 class Node(AbstractNode):
 
@@ -76,7 +75,7 @@ def generatePoints(self,environment):
         self.end = Point(environment,self,"end")
         # PLink initialization takes care of adding links to points
         # and to environment
-        self.pointLinks.append(PLink(environment,self.begin,'<',self.end))
+        self.pointLinks.append(PLink(environment, self.begin, '<', self.end))
         self.hasPoints = 1
 
 
@@ -106,7 +105,7 @@ def printVerbosely(self):
 
 class TimexNode(Node):
 
-    def __init__(self,environment,timexString,timexClass,addPoints=1):
+    def __init__(self, environment, timexString, timexClass, addPoints=1):
         Node.__init__(self,environment)
         self.type = 'TIMEX3'
         self.string = timexString
@@ -118,7 +117,7 @@ def isTimex(self): return 1
 
 class Point(AbstractNode):
 
-    def __init__(self,environment,interval,boundary):
+    def __init__(self, environment, interval, boundary):
         self.id = self.newID()
         self.interval = interval
         self.boundary = boundary
@@ -129,20 +128,20 @@ def __init__(self,environment,interval,boundary):
         # give the point a name derived from the name of the interval
         self.string = interval.string
         if boundary == 'begin':
-            self.string = "%s%s" % (self.string,1)
+            self.string = "%s%s" % (self.string, 1)
         if boundary == 'end':
-            self.string = "%s%s" % (self.string,2)
+            self.string = "%s%s" % (self.string, 2)
     def __str__(self):
-        return "Point(%s,%s,%s)" % (self.id,self.interval.string,self.boundary)
+        return "Point(%s,%s,%s)" % (self.id, self.interval.string, self.boundary)
 
     def copy(self,newEnvironment):
         """Only copies the interval and the boundaries, does not copy inlinks
         outlinks and anything else. Creates a new Point with unique id."""
-        return Point(newEnvironment,self.interval,self.boundary)
+        return Point(newEnvironment, self.interval, self.boundary)
 
     def printVerbosely(self):
         print("\nPoint(%s)" % (self.id))
-        print("  %s - %s" % (self.interval,self.boundary))
+        print("  %s - %s" % (self.interval, self.boundary))
         print("  inLinks:")
         for link in self.inLinks: print("      ", link)
         print("  outLinks:")
@@ -151,7 +150,7 @@ def printVerbosely(self):
 
 class AbstractLink(TemporalObject):
 
-    def __init__(self,begin,relation,end):
+    def __init__(self, begin, relation, end):
         """Create a new link. Perhaps add an existency check and
         return None if new link already exists."""
         self.id = self.newID()
@@ -236,12 +235,12 @@ def isTrivial(self):
     def asPrettyString(self):
         return "[%s %s %s]" % (self.begin.string, self.relation, self.end.string)
 
-    def __cmp__(self,other):
-        comparison1 = cmp(self.begin.string,other.begin.string)
-        if comparison1: return comparison1
-        comparison2 = cmp(self.end.string,other.end.string)
-        if comparison2: return comparison2
-        return cmp(self.relation,other.relation)
+#    def XXX__cmp__(self, other):
+#        comparison1 = cmp(self.begin.string, other.begin.string)
+#        if comparison1: return comparison1
+#        comparison2 = cmp(self.end.string, other.end.string)
+#        if comparison2: return comparison2
+#        return cmp(self.relation, other.relation)
 
     def __str__(self):
         return "PLink(%s) [%s.%s %s %s.%s]" % \

diff --git a/components/merging/wrapper.py b/components/merging/wrapper.py
@@ -51,8 +51,8 @@ def process(self):
         add resulting links to the TarsqiDocument."""
         cp = ConstraintPropagator(self.tarsqidoc)
         tlinks = self.tarsqidoc.tags.find_tags(LIBRARY.timeml.TLINK)
-        # use a primitive sort to order the links on how good they are
-        tlinks = sorted(tlinks, compare_links)
+        # order the links on how good they are
+        tlinks = sorted(tlinks, key=sort_on_confidence, reverse=True)
         cp.queue_constraints(self.tarsqidoc.tags.find_tags(LIBRARY.timeml.TLINK))
         cp.propagate_constraints()
         cp.reduce_graph()
@@ -87,28 +87,21 @@ def _add_constraint_to_tarsqidoc(self, edge):
         self.tarsqidoc.tags.add_tag(TLINK, -1, -1, attrs)
 
 
-def compare_links(link1, link2):
-    """Compare the two links and decide which one of them is more likely to be
-    correct. Rather primitive for now. We consider S2T links the best, then links
-    derived by Blinker, then links derived by the classifier. Classifier links
-    themselves are ordered using their classifier-assigned confidence scores."""
-    o1, o2 = link1.attrs[ORIGIN], link2.attrs[ORIGIN]
-    if o1.startswith('S2T'):
-        return 0 if o2.startswith('S2T') else -1
-    elif o1.startswith('BLINKER'):
-        if o2.startswith('S2T'):
-            return 1
-        elif o2.startswith('BLINKER'):
-            return 0
-        elif o2.startswith('CLASSIFIER'):
-            return -1
-    elif o1.startswith('CLASSIFIER'):
-        if o2.startswith('CLASSIFIER'):
-            o1_confidence = float(o1[11:])
-            o2_confidence = float(o2[11:])
-            return cmp(o2_confidence, o1_confidence)
-        else:
-            return 1
+def sort_on_confidence(link):
+    """Sort key for determining how good we think a link is. Rather primitive for
+    now. We consider S2T links the best, then links derived by Blinker, then
+    links derived by the classifier. Classifier links themselves are ordered
+    using their classifier-assigned confidence scores."""
+    origin = link.attrs[ORIGIN]
+    if origin.startswith(S2T):
+        return 3.0
+    elif origin.startswith(BLINKER):
+        return 2.0
+    elif origin.startswith(CLASSIFIER):
+        confidence = float(origin[11:])
+        return 1.0 + confidence
+    else:
+        return 0
 
 
 def tlink_arg1_attr(identifier):

diff --git a/docmodel/document.py b/docmodel/document.py
@@ -568,19 +568,42 @@ def __str__(self):
         return "<Tag %s %s:%s {%s }>" % \
                (self.name, self.begin, self.end, attrs)
 
-    def __cmp__(self, other):
+    # Tag comparisons are based on offsets alone
+
+    def __eq__(self, other):
+        return self.begin == other.begin and self.end == other.end
+
+    def __ne__(self, other):
+        return self.begin != other.begin or self.end != other.end
+
+    def __lt__(self, other):
+        return self._compare(other) < 0
+
+    def __le__(self, other):
+        return self._compare(other) <= 0
+
+    def __gt__(self, other):
+        return self._compare(other) > 0
+
+    def __ge__(self, other):
+        return self._compare(other) >= 0
+
+    def _compare(self, other):
         """Order two Tags based on their begin offset and end offsets. Tags with
         an earlier begin will be ranked before tags with a later begin, with
         equal begins the tag with the higher end will be ranked first. Tags with
         no begin (that is, it is set to -1) will be ordered at the end. The
         order of two tags with the same begin and end is undefined."""
+        # TODO: this is a bit of a cluge and should be revisited later
+        def comp(x, y):
+            return (x > y) - (x < y)
         if self.begin == -1:
             return 1
         if other.begin == -1:
             return -1
-        begin_cmp = cmp(self.begin, other.begin)
-        end_cmp = cmp(other.end, self.end)
-        return end_cmp if begin_cmp == 0 else begin_cmp
+        begin_comp = comp(self.begin, other.begin)
+        end_comp = comp(other.end, self.end)
+        return end_comp if begin_comp == 0 else begin_comp
 
     @staticmethod
     def new_attr(attr, attrs):

diff --git a/docs/notes/python3.md b/docs/notes/python3.md
@@ -618,7 +618,7 @@ Changed one occurence of the next() method.
 
 [https://portingguide.readthedocs.io/en/latest/builtins.html](https://portingguide.readthedocs.io/en/latest/builtins.html)
 
-Ran all the fixers mentioned in the link above.
+Ran all the fixers mentioned in the link above, all changes are in commit [a8bc653b](https://github.com/tarsqi/ttk/commit/a8bc653bc4f6f5a08d060053095e5aabe11d12fa).
 
 Some comments:
 
@@ -627,25 +627,83 @@ Some comments:
 
 
 
+### 3.10. Comparing and sorting
+
+[https://portingguide.readthedocs.io/en/latest/comparisons.html](https://portingguide.readthedocs.io/en/latest/comparisons.html)
+[http://python3porting.com/preparing.html](http://python3porting.com/preparing.html)
+
+All manual changes, searching for uses of `cmp` and `__cmp__`.
+
+&para; `components/classifier/vectors.py`
+
+Sorting was done for printing the vectors and used a couple of helper functions, but it looks like a simple sort on the keys worked the same so I removed the code that did the comparison.
+
+&para; `components/common_modules/constituent.py`
+
+This is more complicated because the `Consituent.__cmp__` was set up to allow the FSA module to compare  objects of different types:
+
+```python
+def __cmp__(self, other):
+    # NOTE: in some cases the matchLabel method in FSA.py checks for
+    # equality of a constituent and a string using == in which case this
+    # method is invoked, which would throw an error without the first two
+    # lines
+    if isinstance(other, type('')):
+        return 1
+    return cmp(self.begin, other.begin)
+```
+Now it looks like that comment was nonsense. In fact, there is no `matchLabel` method, but there is a `labelMatches` method, and the above function never appears to be called from the latter. Also, the above method is called from *_distributeNode_V* in the *features* module or from *_get_tag_vectors_for_sentence* in the *vectors* module, so no FSA origin here.
+
+Anyway, replaced `__cmp__` with the rich comparison operators.
+
+Then somehow an error with the FSA did show up when using the equals operator. Turns out the label can be 'ALL' and then the comparison chokes. Edited labelMatches so that if the label is ALL the match always succeeds. This may be wrong, but no tests broke.
+
+&para; `components/merging/sputlink/rules/objects.py`
+
+Use of comparison for Point Links does not appear to be used in runtime code, so commented it out for now. TODO: when running *generateRules* comparisons may be needed.
+
+&para; `components/merging/wrapper.py`
+
+Somewhat tricky since the comparison function parses the origin field of the link and looks at the component name and the associated confidence score if the component is the classifier. Cannot just give the origin as the key. Used key function that generates a floeat from the origin attribute.
+
+&para; `docmodel/document.py` and `testing/evaluate.py`
+
+Used rich comparison operators to replace `__cmp__`, but used the contents of the old comparison method in a utility method.
+
+&para; `utilities/convert.py`
+
+Replaced comparison operator with rich comparison operators.
+
+&para; `utilities/make_documentation.py`
+
+Replaced cmp arugment with key argument on sort. Also needed to make a lot of string explicitly unidoce strings.
+
+&para; `utilities/wordnet.py`
+
+Could have dealt with uses of `cmp` by using `comp` and define the latter as before and with uses of `__cmp__` by doing the `_compare` thing as with some of the others above.
+
+In fact, I did nothing, just commented out methods and functions that used `cmp` or `__cmp__`. This module is not used in the runtime version and will be retired.
+
 
 
 ## 4.  Remaining thingies
 
 List of steps still remaining.
 
-- comparing and sorting, including rich comparison operators
-  - [https://portingguide.readthedocs.io/en/latest/comparisons.html](https://portingguide.readthedocs.io/en/latest/comparisons.html)
-  - [http://python3porting.com/preparing.html](http://python3porting.com/preparing.html)
 - Other core object changes
-   - [https://portingguide.readthedocs.io/en/latest/core-obj-misc.html](https://portingguide.readthedocs.io/en/latest/core-obj-misc.html)
+  - [https://portingguide.readthedocs.io/en/latest/core-obj-misc.html](https://portingguide.readthedocs.io/en/latest/core-obj-misc.html)
 - Other changes
    - [https://portingguide.readthedocs.io/en/latest/etc.html](https://portingguide.readthedocs.io/en/latest/etc.html)
 
 After that we can run `python-modernize` ,`pylint --py3k` and `python -3` when running tarsqi.py and the testing and regression scripts.
 
 Maybe check [http://python3porting.com/stdlib.html#removed-modules](http://python3porting.com/stdlib.html#removed-modules).
 
-Followed by `2to3`.
+Run make_documentation.py.
+
+Version and save last Python2 compliant version.
+
+Run `2to3`.
 
 Remove from future imports and calls to six library.