diff --git a/components/classifier/vectors.py b/components/classifier/vectors.py index 9c5765e..825f94c 100644 --- a/components/classifier/vectors.py +++ b/components/classifier/vectors.py @@ -155,21 +155,6 @@ def make_vector(tarsqidoc, s, tag): return None -def compare_features(f1, f2): - """Comparison method for feature sorting.""" - def get_prefix(feature): - if feature.startswith('e1-'): return 'e1' - if feature.startswith('e2-'): return 'e2' - if feature.startswith('e-'): return 'e' - if feature.startswith('t-'): return 't' - return 'x' - prefixes = {'e': 1, 't': 2, 'e1': 3, 'e2': 4} - p1 = get_prefix(f1) - p2 = get_prefix(f2) - prefix_comparison = cmp(p1, p2) - return cmp(f1, f2) if prefix_comparison == 0 else prefix_comparison - - def abbreviate(attr): """Abbreviate the feature name, but abbreviate only the part without the prefix (which can be e-, t-, e1- or e2-).""" @@ -199,7 +184,7 @@ def __init__(self, tarsqidoc, sentence, source, source_tag, features): def __str__(self): feats = [] - for feat in self.sorted_features(): + for feat in sorted(self.features.keys()): val = self.features[feat] feats.append("%s=%s" % (feat, val)) return ' '.join(feats) @@ -210,9 +195,6 @@ def is_event_vector(self): def is_timex_vector(self): return False - def sorted_features(self): - return sorted(list(self.features.keys()), compare_features) - def add_feature(self, feat, val): self.features[abbreviate(feat)] = val @@ -260,7 +242,7 @@ class PairVector(Vector): def __init__(self, tarsqidoc, prefix1, vector1, prefix2, vector2): """Initialize a pair vector from two object vectors by setting an - identifier and by adding the features of th eobject vectors.""" + identifier and by adding the features of the object vectors.""" self.tarsqidoc = tarsqidoc self.filename = self._get_filename() self.source = (vector1.source, vector2.source) diff --git a/components/common_modules/constituent.py b/components/common_modules/constituent.py index ed50fe0..9a84f52 100644 --- a/components/common_modules/constituent.py +++ b/components/common_modules/constituent.py @@ -54,14 +54,23 @@ def __reversed__(self): def __nonzero__(self): return True - def __cmp__(self, other): - # NOTE: in some cases the matchLabel method in FSA.py checks for - # equality of a constituent and a string using == in which case this - # method is invoked, which would throw an error without the first two - # lines - if isinstance(other, type('')): - return 1 - return cmp(self.begin, other.begin) + def __eq__(self, other): + return self.begin == other.begin + + def __ne__(self, other): + return self.begin != other.begin + + def __lt__(self, other): + return self.begin < other.begin + + def __le__(self, other): + return self.begin <= other.begin + + def __gt__(self, other): + return self.begin > other.begin + + def __ge__(self, other): + return self.begin >= other.begin def __len__(self): """Returns the lenght of the dtrs variable.""" diff --git a/components/merging/sputlink/rules/objects.py b/components/merging/sputlink/rules/objects.py index f2a86c5..cb62568 100644 --- a/components/merging/sputlink/rules/objects.py +++ b/components/merging/sputlink/rules/objects.py @@ -1,6 +1,7 @@ - from __future__ import print_function + + class ObjectList(object): """Class that provides interface for global PointLink and Link data bases. Just a wrapper around a list. Assumes that all elements are @@ -28,7 +29,6 @@ def __str__(self): return returnStr - class TemporalObject(object): """Abstract class that is a super class of all link and node classes. Only responsible for providing unique IDs.""" @@ -52,7 +52,6 @@ def addOutLink(self,pointlink): self.outLinks.append(pointlink) self.isDirty = 1 - class Node(AbstractNode): @@ -76,7 +75,7 @@ def generatePoints(self,environment): self.end = Point(environment,self,"end") # PLink initialization takes care of adding links to points # and to environment - self.pointLinks.append(PLink(environment,self.begin,'<',self.end)) + self.pointLinks.append(PLink(environment, self.begin, '<', self.end)) self.hasPoints = 1 @@ -106,7 +105,7 @@ def printVerbosely(self): class TimexNode(Node): - def __init__(self,environment,timexString,timexClass,addPoints=1): + def __init__(self, environment, timexString, timexClass, addPoints=1): Node.__init__(self,environment) self.type = 'TIMEX3' self.string = timexString @@ -118,7 +117,7 @@ def isTimex(self): return 1 class Point(AbstractNode): - def __init__(self,environment,interval,boundary): + def __init__(self, environment, interval, boundary): self.id = self.newID() self.interval = interval self.boundary = boundary @@ -129,20 +128,20 @@ def __init__(self,environment,interval,boundary): # give the point a name derived from the name of the interval self.string = interval.string if boundary == 'begin': - self.string = "%s%s" % (self.string,1) + self.string = "%s%s" % (self.string, 1) if boundary == 'end': - self.string = "%s%s" % (self.string,2) + self.string = "%s%s" % (self.string, 2) def __str__(self): - return "Point(%s,%s,%s)" % (self.id,self.interval.string,self.boundary) + return "Point(%s,%s,%s)" % (self.id, self.interval.string, self.boundary) def copy(self,newEnvironment): """Only copies the interval and the boundaries, does not copy inlinks outlinks and anything else. Creates a new Point with unique id.""" - return Point(newEnvironment,self.interval,self.boundary) + return Point(newEnvironment, self.interval, self.boundary) def printVerbosely(self): print("\nPoint(%s)" % (self.id)) - print(" %s - %s" % (self.interval,self.boundary)) + print(" %s - %s" % (self.interval, self.boundary)) print(" inLinks:") for link in self.inLinks: print(" ", link) print(" outLinks:") @@ -151,7 +150,7 @@ def printVerbosely(self): class AbstractLink(TemporalObject): - def __init__(self,begin,relation,end): + def __init__(self, begin, relation, end): """Create a new link. Perhaps add an existency check and return None if new link already exists.""" self.id = self.newID() @@ -236,12 +235,12 @@ def isTrivial(self): def asPrettyString(self): return "[%s %s %s]" % (self.begin.string, self.relation, self.end.string) - def __cmp__(self,other): - comparison1 = cmp(self.begin.string,other.begin.string) - if comparison1: return comparison1 - comparison2 = cmp(self.end.string,other.end.string) - if comparison2: return comparison2 - return cmp(self.relation,other.relation) +# def XXX__cmp__(self, other): +# comparison1 = cmp(self.begin.string, other.begin.string) +# if comparison1: return comparison1 +# comparison2 = cmp(self.end.string, other.end.string) +# if comparison2: return comparison2 +# return cmp(self.relation, other.relation) def __str__(self): return "PLink(%s) [%s.%s %s %s.%s]" % \ diff --git a/components/merging/wrapper.py b/components/merging/wrapper.py index 0a80e00..08e6098 100644 --- a/components/merging/wrapper.py +++ b/components/merging/wrapper.py @@ -51,8 +51,8 @@ def process(self): add resulting links to the TarsqiDocument.""" cp = ConstraintPropagator(self.tarsqidoc) tlinks = self.tarsqidoc.tags.find_tags(LIBRARY.timeml.TLINK) - # use a primitive sort to order the links on how good they are - tlinks = sorted(tlinks, compare_links) + # order the links on how good they are + tlinks = sorted(tlinks, key=sort_on_confidence, reverse=True) cp.queue_constraints(self.tarsqidoc.tags.find_tags(LIBRARY.timeml.TLINK)) cp.propagate_constraints() cp.reduce_graph() @@ -87,28 +87,21 @@ def _add_constraint_to_tarsqidoc(self, edge): self.tarsqidoc.tags.add_tag(TLINK, -1, -1, attrs) -def compare_links(link1, link2): - """Compare the two links and decide which one of them is more likely to be - correct. Rather primitive for now. We consider S2T links the best, then links - derived by Blinker, then links derived by the classifier. Classifier links - themselves are ordered using their classifier-assigned confidence scores.""" - o1, o2 = link1.attrs[ORIGIN], link2.attrs[ORIGIN] - if o1.startswith('S2T'): - return 0 if o2.startswith('S2T') else -1 - elif o1.startswith('BLINKER'): - if o2.startswith('S2T'): - return 1 - elif o2.startswith('BLINKER'): - return 0 - elif o2.startswith('CLASSIFIER'): - return -1 - elif o1.startswith('CLASSIFIER'): - if o2.startswith('CLASSIFIER'): - o1_confidence = float(o1[11:]) - o2_confidence = float(o2[11:]) - return cmp(o2_confidence, o1_confidence) - else: - return 1 +def sort_on_confidence(link): + """Sort key for determining how good we think a link is. Rather primitive for + now. We consider S2T links the best, then links derived by Blinker, then + links derived by the classifier. Classifier links themselves are ordered + using their classifier-assigned confidence scores.""" + origin = link.attrs[ORIGIN] + if origin.startswith(S2T): + return 3.0 + elif origin.startswith(BLINKER): + return 2.0 + elif origin.startswith(CLASSIFIER): + confidence = float(origin[11:]) + return 1.0 + confidence + else: + return 0 def tlink_arg1_attr(identifier): diff --git a/docmodel/document.py b/docmodel/document.py index efcc24a..9fb72f2 100644 --- a/docmodel/document.py +++ b/docmodel/document.py @@ -568,19 +568,42 @@ def __str__(self): return "" % \ (self.name, self.begin, self.end, attrs) - def __cmp__(self, other): + # Tag comparisons are based on offsets alone + + def __eq__(self, other): + return self.begin == other.begin and self.end == other.end + + def __ne__(self, other): + return self.begin != other.begin or self.end != other.end + + def __lt__(self, other): + return self._compare(other) < 0 + + def __le__(self, other): + return self._compare(other) <= 0 + + def __gt__(self, other): + return self._compare(other) > 0 + + def __ge__(self, other): + return self._compare(other) >= 0 + + def _compare(self, other): """Order two Tags based on their begin offset and end offsets. Tags with an earlier begin will be ranked before tags with a later begin, with equal begins the tag with the higher end will be ranked first. Tags with no begin (that is, it is set to -1) will be ordered at the end. The order of two tags with the same begin and end is undefined.""" + # TODO: this is a bit of a cluge and should be revisited later + def comp(x, y): + return (x > y) - (x < y) if self.begin == -1: return 1 if other.begin == -1: return -1 - begin_cmp = cmp(self.begin, other.begin) - end_cmp = cmp(other.end, self.end) - return end_cmp if begin_cmp == 0 else begin_cmp + begin_comp = comp(self.begin, other.begin) + end_comp = comp(other.end, self.end) + return end_comp if begin_comp == 0 else begin_comp @staticmethod def new_attr(attr, attrs): diff --git a/docs/notes/python3.md b/docs/notes/python3.md index 480f9a8..4052958 100644 --- a/docs/notes/python3.md +++ b/docs/notes/python3.md @@ -618,7 +618,7 @@ Changed one occurence of the next() method. [https://portingguide.readthedocs.io/en/latest/builtins.html](https://portingguide.readthedocs.io/en/latest/builtins.html) -Ran all the fixers mentioned in the link above. +Ran all the fixers mentioned in the link above, all changes are in commit [a8bc653b](https://github.com/tarsqi/ttk/commit/a8bc653bc4f6f5a08d060053095e5aabe11d12fa). Some comments: @@ -627,17 +627,71 @@ Some comments: +### 3.10. Comparing and sorting + +[https://portingguide.readthedocs.io/en/latest/comparisons.html](https://portingguide.readthedocs.io/en/latest/comparisons.html) +[http://python3porting.com/preparing.html](http://python3porting.com/preparing.html) + +All manual changes, searching for uses of `cmp` and `__cmp__`. + +¶ `components/classifier/vectors.py` + +Sorting was done for printing the vectors and used a couple of helper functions, but it looks like a simple sort on the keys worked the same so I removed the code that did the comparison. + +¶ `components/common_modules/constituent.py` + +This is more complicated because the `Consituent.__cmp__` was set up to allow the FSA module to compare objects of different types: + +```python +def __cmp__(self, other): + # NOTE: in some cases the matchLabel method in FSA.py checks for + # equality of a constituent and a string using == in which case this + # method is invoked, which would throw an error without the first two + # lines + if isinstance(other, type('')): + return 1 + return cmp(self.begin, other.begin) +``` +Now it looks like that comment was nonsense. In fact, there is no `matchLabel` method, but there is a `labelMatches` method, and the above function never appears to be called from the latter. Also, the above method is called from *_distributeNode_V* in the *features* module or from *_get_tag_vectors_for_sentence* in the *vectors* module, so no FSA origin here. + +Anyway, replaced `__cmp__` with the rich comparison operators. + +Then somehow an error with the FSA did show up when using the equals operator. Turns out the label can be 'ALL' and then the comparison chokes. Edited labelMatches so that if the label is ALL the match always succeeds. This may be wrong, but no tests broke. + +¶ `components/merging/sputlink/rules/objects.py` + +Use of comparison for Point Links does not appear to be used in runtime code, so commented it out for now. TODO: when running *generateRules* comparisons may be needed. + +¶ `components/merging/wrapper.py` + +Somewhat tricky since the comparison function parses the origin field of the link and looks at the component name and the associated confidence score if the component is the classifier. Cannot just give the origin as the key. Used key function that generates a floeat from the origin attribute. + +¶ `docmodel/document.py` and `testing/evaluate.py` + +Used rich comparison operators to replace `__cmp__`, but used the contents of the old comparison method in a utility method. + +¶ `utilities/convert.py` + +Replaced comparison operator with rich comparison operators. + +¶ `utilities/make_documentation.py` + +Replaced cmp arugment with key argument on sort. Also needed to make a lot of string explicitly unidoce strings. + +¶ `utilities/wordnet.py` + +Could have dealt with uses of `cmp` by using `comp` and define the latter as before and with uses of `__cmp__` by doing the `_compare` thing as with some of the others above. + +In fact, I did nothing, just commented out methods and functions that used `cmp` or `__cmp__`. This module is not used in the runtime version and will be retired. + ## 4. Remaining thingies List of steps still remaining. -- comparing and sorting, including rich comparison operators - - [https://portingguide.readthedocs.io/en/latest/comparisons.html](https://portingguide.readthedocs.io/en/latest/comparisons.html) - - [http://python3porting.com/preparing.html](http://python3porting.com/preparing.html) - Other core object changes - - [https://portingguide.readthedocs.io/en/latest/core-obj-misc.html](https://portingguide.readthedocs.io/en/latest/core-obj-misc.html) + - [https://portingguide.readthedocs.io/en/latest/core-obj-misc.html](https://portingguide.readthedocs.io/en/latest/core-obj-misc.html) - Other changes - [https://portingguide.readthedocs.io/en/latest/etc.html](https://portingguide.readthedocs.io/en/latest/etc.html) @@ -645,7 +699,11 @@ After that we can run `python-modernize` ,`pylint --py3k` and `python -3` when r Maybe check [http://python3porting.com/stdlib.html#removed-modules](http://python3porting.com/stdlib.html#removed-modules). -Followed by `2to3`. +Run make_documentation.py. + +Version and save last Python2 compliant version. + +Run `2to3`. Remove from future imports and calls to six library. diff --git a/testing/evaluate.py b/testing/evaluate.py index 11ede34..e1c2aa9 100644 --- a/testing/evaluate.py +++ b/testing/evaluate.py @@ -264,7 +264,7 @@ def _retrieve_from_index(identifier, tagtype, event_idx, timex_idx): def precision(tp, fp): try: - return (tp / (tp + fp) + return (tp / (tp + fp)) except ZeroDivisionError: return None @@ -640,14 +640,40 @@ def __init__(self, offsets, attrs): def __str__(self): return "" % (self.begin, self.end, self.attrs) - def __cmp__(self, other): - begin_cmp = cmp(self.begin, other.begin) - if begin_cmp != 0: - return begin_cmp - end_cmp = cmp(self.end, other.end) - if end_cmp != 0: - return end_cmp - return cmp(self.begin_head, other.begin_head) + def __eq__(self, other): + return (self.begin == other.begin) \ + and (self.end == other.end) \ + and (self.begin_head == other.begin_head) + + def __ne__(self, other): + return (self.begin != other.begin) \ + or (self.end != other.end) \ + or (self.begin_head != other.begin_head) + + def __lt__(self, other): + return self._compare(other) < 0 + + def __le__(self, other): + return self._compare(other) <= 0 + + def __gt__(self, other): + return self._compare(other) > 0 + + def __ge__(self, other): + return self._compare(other) >= 0 + + def _compare(self, other): + # TODO: revisit this later, it is Python3 compliant, but that's about + # the best you can say + def comp(x, y): + return (x > y) - (x < y) + begin_comp = comp(self.begin, other.begin) + if begin_comp != 0: + return begin_comp + end_comp = comp(self.end, other.end) + if end_comp != 0: + return end_comp + return comp(self.begin_head, other.begin_head) def overlaps_with(self, other): return not (self.end <= other.begin or other.end <= self.begin) diff --git a/utilities/FSA.py b/utilities/FSA.py index e9207be..b4f0fbf 100644 --- a/utilities/FSA.py +++ b/utilities/FSA.py @@ -1325,7 +1325,9 @@ def labelMatches(label, input): #print "LABEL MATCHES: "+label+" "+ str(input)+"?" #logger.out('label =', label) #logger.out('input =', input.__class__.__name__) - if (type(label) is StringType and (label[0] == '{' or label[-1] == '}')): + if label == 'ALL': + return True + elif (type(label) is StringType and (label[0] == '{' or label[-1] == '}')): """Pattern expression has been given in a Python dictionary format """ #print ">>> LABEL %s is string with curly brackets" % label label = labelDict(label) diff --git a/utilities/convert.py b/utilities/convert.py index c443645..c572a68 100644 --- a/utilities/convert.py +++ b/utilities/convert.py @@ -522,8 +522,23 @@ def __init__(self, annotation): self.attributes = {} self.relations = [] - def __cmp__(self, other): - return cmp(self.start, other.start) + def __eq__(self, other): + return self.start == other.start + + def __ne__(self, other): + return self.start != other.start + + def __lt__(self, other): + return self.start < other.start + + def __le__(self, other): + return self.start <= other.start + + def __gt__(self, other): + return self.start > other.start + + def __ge__(self, other): + return self.start >= other.start def __str__(self): return "" \ diff --git a/utilities/find.py b/utilities/find.py index 94a2ce4..8ae2692 100644 --- a/utilities/find.py +++ b/utilities/find.py @@ -13,10 +13,12 @@ def search_file(name, search_term): line_number = 0 for line in fh: line_number += 1 - line = line.rstrip() + line = line.strip() + if line.startswith('#'): + continue if search_term in line: loc = "%s:%d" % (name, line_number) - print(("%-30s == %s" % (loc, line.strip()))) + print(("%-30s == %s" % (loc, line))) for root, dirs, files in os.walk(".", topdown=False): diff --git a/utilities/make_documentation.py b/utilities/make_documentation.py index b96a893..1618fa2 100644 --- a/utilities/make_documentation.py +++ b/utilities/make_documentation.py @@ -54,7 +54,7 @@ DOCUMENTATION_DIR = os.path.join('docs', 'code') -javascript_code = """