Skip to content

Commit

Permalink
Updated comparing and sorting and fixed some print statements (issue #79
Browse files Browse the repository at this point in the history
).
  • Loading branch information
marcverhagen committed Jan 3, 2021
1 parent a8bc653 commit 8f1bc1d
Show file tree
Hide file tree
Showing 12 changed files with 298 additions and 184 deletions.
22 changes: 2 additions & 20 deletions components/classifier/vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,21 +155,6 @@ def make_vector(tarsqidoc, s, tag):
return None


def compare_features(f1, f2):
"""Comparison method for feature sorting."""
def get_prefix(feature):
if feature.startswith('e1-'): return 'e1'
if feature.startswith('e2-'): return 'e2'
if feature.startswith('e-'): return 'e'
if feature.startswith('t-'): return 't'
return 'x'
prefixes = {'e': 1, 't': 2, 'e1': 3, 'e2': 4}
p1 = get_prefix(f1)
p2 = get_prefix(f2)
prefix_comparison = cmp(p1, p2)
return cmp(f1, f2) if prefix_comparison == 0 else prefix_comparison


def abbreviate(attr):
"""Abbreviate the feature name, but abbreviate only the part without the
prefix (which can be e-, t-, e1- or e2-)."""
Expand Down Expand Up @@ -199,7 +184,7 @@ def __init__(self, tarsqidoc, sentence, source, source_tag, features):

def __str__(self):
feats = []
for feat in self.sorted_features():
for feat in sorted(self.features.keys()):
val = self.features[feat]
feats.append("%s=%s" % (feat, val))
return ' '.join(feats)
Expand All @@ -210,9 +195,6 @@ def is_event_vector(self):
def is_timex_vector(self):
return False

def sorted_features(self):
return sorted(list(self.features.keys()), compare_features)

def add_feature(self, feat, val):
self.features[abbreviate(feat)] = val

Expand Down Expand Up @@ -260,7 +242,7 @@ class PairVector(Vector):

def __init__(self, tarsqidoc, prefix1, vector1, prefix2, vector2):
"""Initialize a pair vector from two object vectors by setting an
identifier and by adding the features of th eobject vectors."""
identifier and by adding the features of the object vectors."""
self.tarsqidoc = tarsqidoc
self.filename = self._get_filename()
self.source = (vector1.source, vector2.source)
Expand Down
25 changes: 17 additions & 8 deletions components/common_modules/constituent.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,23 @@ def __reversed__(self):
def __nonzero__(self):
return True

def __cmp__(self, other):
# NOTE: in some cases the matchLabel method in FSA.py checks for
# equality of a constituent and a string using == in which case this
# method is invoked, which would throw an error without the first two
# lines
if isinstance(other, type('')):
return 1
return cmp(self.begin, other.begin)
def __eq__(self, other):
return self.begin == other.begin

def __ne__(self, other):
return self.begin != other.begin

def __lt__(self, other):
return self.begin < other.begin

def __le__(self, other):
return self.begin <= other.begin

def __gt__(self, other):
return self.begin > other.begin

def __ge__(self, other):
return self.begin >= other.begin

def __len__(self):
"""Returns the lenght of the dtrs variable."""
Expand Down
35 changes: 17 additions & 18 deletions components/merging/sputlink/rules/objects.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@


from __future__ import print_function


class ObjectList(object):
"""Class that provides interface for global PointLink and Link data
bases. Just a wrapper around a list. Assumes that all elements are
Expand Down Expand Up @@ -28,7 +29,6 @@ def __str__(self):
return returnStr



class TemporalObject(object):
"""Abstract class that is a super class of all link and node classes. Only
responsible for providing unique IDs."""
Expand All @@ -52,7 +52,6 @@ def addOutLink(self,pointlink):
self.outLinks.append(pointlink)
self.isDirty = 1



class Node(AbstractNode):

Expand All @@ -76,7 +75,7 @@ def generatePoints(self,environment):
self.end = Point(environment,self,"end")
# PLink initialization takes care of adding links to points
# and to environment
self.pointLinks.append(PLink(environment,self.begin,'<',self.end))
self.pointLinks.append(PLink(environment, self.begin, '<', self.end))
self.hasPoints = 1


Expand Down Expand Up @@ -106,7 +105,7 @@ def printVerbosely(self):

class TimexNode(Node):

def __init__(self,environment,timexString,timexClass,addPoints=1):
def __init__(self, environment, timexString, timexClass, addPoints=1):
Node.__init__(self,environment)
self.type = 'TIMEX3'
self.string = timexString
Expand All @@ -118,7 +117,7 @@ def isTimex(self): return 1

class Point(AbstractNode):

def __init__(self,environment,interval,boundary):
def __init__(self, environment, interval, boundary):
self.id = self.newID()
self.interval = interval
self.boundary = boundary
Expand All @@ -129,20 +128,20 @@ def __init__(self,environment,interval,boundary):
# give the point a name derived from the name of the interval
self.string = interval.string
if boundary == 'begin':
self.string = "%s%s" % (self.string,1)
self.string = "%s%s" % (self.string, 1)
if boundary == 'end':
self.string = "%s%s" % (self.string,2)
self.string = "%s%s" % (self.string, 2)
def __str__(self):
return "Point(%s,%s,%s)" % (self.id,self.interval.string,self.boundary)
return "Point(%s,%s,%s)" % (self.id, self.interval.string, self.boundary)

def copy(self,newEnvironment):
"""Only copies the interval and the boundaries, does not copy inlinks
outlinks and anything else. Creates a new Point with unique id."""
return Point(newEnvironment,self.interval,self.boundary)
return Point(newEnvironment, self.interval, self.boundary)

def printVerbosely(self):
print("\nPoint(%s)" % (self.id))
print(" %s - %s" % (self.interval,self.boundary))
print(" %s - %s" % (self.interval, self.boundary))
print(" inLinks:")
for link in self.inLinks: print(" ", link)
print(" outLinks:")
Expand All @@ -151,7 +150,7 @@ def printVerbosely(self):

class AbstractLink(TemporalObject):

def __init__(self,begin,relation,end):
def __init__(self, begin, relation, end):
"""Create a new link. Perhaps add an existency check and
return None if new link already exists."""
self.id = self.newID()
Expand Down Expand Up @@ -236,12 +235,12 @@ def isTrivial(self):
def asPrettyString(self):
return "[%s %s %s]" % (self.begin.string, self.relation, self.end.string)

def __cmp__(self,other):
comparison1 = cmp(self.begin.string,other.begin.string)
if comparison1: return comparison1
comparison2 = cmp(self.end.string,other.end.string)
if comparison2: return comparison2
return cmp(self.relation,other.relation)
# def XXX__cmp__(self, other):
# comparison1 = cmp(self.begin.string, other.begin.string)
# if comparison1: return comparison1
# comparison2 = cmp(self.end.string, other.end.string)
# if comparison2: return comparison2
# return cmp(self.relation, other.relation)

def __str__(self):
return "PLink(%s) [%s.%s %s %s.%s]" % \
Expand Down
41 changes: 17 additions & 24 deletions components/merging/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ def process(self):
add resulting links to the TarsqiDocument."""
cp = ConstraintPropagator(self.tarsqidoc)
tlinks = self.tarsqidoc.tags.find_tags(LIBRARY.timeml.TLINK)
# use a primitive sort to order the links on how good they are
tlinks = sorted(tlinks, compare_links)
# order the links on how good they are
tlinks = sorted(tlinks, key=sort_on_confidence, reverse=True)
cp.queue_constraints(self.tarsqidoc.tags.find_tags(LIBRARY.timeml.TLINK))
cp.propagate_constraints()
cp.reduce_graph()
Expand Down Expand Up @@ -87,28 +87,21 @@ def _add_constraint_to_tarsqidoc(self, edge):
self.tarsqidoc.tags.add_tag(TLINK, -1, -1, attrs)


def compare_links(link1, link2):
"""Compare the two links and decide which one of them is more likely to be
correct. Rather primitive for now. We consider S2T links the best, then links
derived by Blinker, then links derived by the classifier. Classifier links
themselves are ordered using their classifier-assigned confidence scores."""
o1, o2 = link1.attrs[ORIGIN], link2.attrs[ORIGIN]
if o1.startswith('S2T'):
return 0 if o2.startswith('S2T') else -1
elif o1.startswith('BLINKER'):
if o2.startswith('S2T'):
return 1
elif o2.startswith('BLINKER'):
return 0
elif o2.startswith('CLASSIFIER'):
return -1
elif o1.startswith('CLASSIFIER'):
if o2.startswith('CLASSIFIER'):
o1_confidence = float(o1[11:])
o2_confidence = float(o2[11:])
return cmp(o2_confidence, o1_confidence)
else:
return 1
def sort_on_confidence(link):
"""Sort key for determining how good we think a link is. Rather primitive for
now. We consider S2T links the best, then links derived by Blinker, then
links derived by the classifier. Classifier links themselves are ordered
using their classifier-assigned confidence scores."""
origin = link.attrs[ORIGIN]
if origin.startswith(S2T):
return 3.0
elif origin.startswith(BLINKER):
return 2.0
elif origin.startswith(CLASSIFIER):
confidence = float(origin[11:])
return 1.0 + confidence
else:
return 0


def tlink_arg1_attr(identifier):
Expand Down
31 changes: 27 additions & 4 deletions docmodel/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,19 +568,42 @@ def __str__(self):
return "<Tag %s %s:%s {%s }>" % \
(self.name, self.begin, self.end, attrs)

def __cmp__(self, other):
# Tag comparisons are based on offsets alone

def __eq__(self, other):
return self.begin == other.begin and self.end == other.end

def __ne__(self, other):
return self.begin != other.begin or self.end != other.end

def __lt__(self, other):
return self._compare(other) < 0

def __le__(self, other):
return self._compare(other) <= 0

def __gt__(self, other):
return self._compare(other) > 0

def __ge__(self, other):
return self._compare(other) >= 0

def _compare(self, other):
"""Order two Tags based on their begin offset and end offsets. Tags with
an earlier begin will be ranked before tags with a later begin, with
equal begins the tag with the higher end will be ranked first. Tags with
no begin (that is, it is set to -1) will be ordered at the end. The
order of two tags with the same begin and end is undefined."""
# TODO: this is a bit of a cluge and should be revisited later
def comp(x, y):
return (x > y) - (x < y)
if self.begin == -1:
return 1
if other.begin == -1:
return -1
begin_cmp = cmp(self.begin, other.begin)
end_cmp = cmp(other.end, self.end)
return end_cmp if begin_cmp == 0 else begin_cmp
begin_comp = comp(self.begin, other.begin)
end_comp = comp(other.end, self.end)
return end_comp if begin_comp == 0 else begin_comp

@staticmethod
def new_attr(attr, attrs):
Expand Down
70 changes: 64 additions & 6 deletions docs/notes/python3.md
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ Changed one occurence of the next() method.

[https://portingguide.readthedocs.io/en/latest/builtins.html](https://portingguide.readthedocs.io/en/latest/builtins.html)

Ran all the fixers mentioned in the link above.
Ran all the fixers mentioned in the link above, all changes are in commit [a8bc653b](https://github.com/tarsqi/ttk/commit/a8bc653bc4f6f5a08d060053095e5aabe11d12fa).

Some comments:

Expand All @@ -627,25 +627,83 @@ Some comments:



### 3.10. Comparing and sorting

[https://portingguide.readthedocs.io/en/latest/comparisons.html](https://portingguide.readthedocs.io/en/latest/comparisons.html)
[http://python3porting.com/preparing.html](http://python3porting.com/preparing.html)

All manual changes, searching for uses of `cmp` and `__cmp__`.

&para; `components/classifier/vectors.py`

Sorting was done for printing the vectors and used a couple of helper functions, but it looks like a simple sort on the keys worked the same so I removed the code that did the comparison.

&para; `components/common_modules/constituent.py`

This is more complicated because the `Consituent.__cmp__` was set up to allow the FSA module to compare objects of different types:

```python
def __cmp__(self, other):
# NOTE: in some cases the matchLabel method in FSA.py checks for
# equality of a constituent and a string using == in which case this
# method is invoked, which would throw an error without the first two
# lines
if isinstance(other, type('')):
return 1
return cmp(self.begin, other.begin)
```
Now it looks like that comment was nonsense. In fact, there is no `matchLabel` method, but there is a `labelMatches` method, and the above function never appears to be called from the latter. Also, the above method is called from *_distributeNode_V* in the *features* module or from *_get_tag_vectors_for_sentence* in the *vectors* module, so no FSA origin here.

Anyway, replaced `__cmp__` with the rich comparison operators.

Then somehow an error with the FSA did show up when using the equals operator. Turns out the label can be 'ALL' and then the comparison chokes. Edited labelMatches so that if the label is ALL the match always succeeds. This may be wrong, but no tests broke.

&para; `components/merging/sputlink/rules/objects.py`

Use of comparison for Point Links does not appear to be used in runtime code, so commented it out for now. TODO: when running *generateRules* comparisons may be needed.

&para; `components/merging/wrapper.py`

Somewhat tricky since the comparison function parses the origin field of the link and looks at the component name and the associated confidence score if the component is the classifier. Cannot just give the origin as the key. Used key function that generates a floeat from the origin attribute.

&para; `docmodel/document.py` and `testing/evaluate.py`

Used rich comparison operators to replace `__cmp__`, but used the contents of the old comparison method in a utility method.

&para; `utilities/convert.py`

Replaced comparison operator with rich comparison operators.

&para; `utilities/make_documentation.py`

Replaced cmp arugment with key argument on sort. Also needed to make a lot of string explicitly unidoce strings.

&para; `utilities/wordnet.py`

Could have dealt with uses of `cmp` by using `comp` and define the latter as before and with uses of `__cmp__` by doing the `_compare` thing as with some of the others above.

In fact, I did nothing, just commented out methods and functions that used `cmp` or `__cmp__`. This module is not used in the runtime version and will be retired.



## 4. Remaining thingies

List of steps still remaining.

- comparing and sorting, including rich comparison operators
- [https://portingguide.readthedocs.io/en/latest/comparisons.html](https://portingguide.readthedocs.io/en/latest/comparisons.html)
- [http://python3porting.com/preparing.html](http://python3porting.com/preparing.html)
- Other core object changes
- [https://portingguide.readthedocs.io/en/latest/core-obj-misc.html](https://portingguide.readthedocs.io/en/latest/core-obj-misc.html)
- [https://portingguide.readthedocs.io/en/latest/core-obj-misc.html](https://portingguide.readthedocs.io/en/latest/core-obj-misc.html)
- Other changes
- [https://portingguide.readthedocs.io/en/latest/etc.html](https://portingguide.readthedocs.io/en/latest/etc.html)

After that we can run `python-modernize` ,`pylint --py3k` and `python -3` when running tarsqi.py and the testing and regression scripts.

Maybe check [http://python3porting.com/stdlib.html#removed-modules](http://python3porting.com/stdlib.html#removed-modules).

Followed by `2to3`.
Run make_documentation.py.

Version and save last Python2 compliant version.

Run `2to3`.

Remove from future imports and calls to six library.

Expand Down
Loading

0 comments on commit 8f1bc1d

Please sign in to comment.