diff --git a/components/classifier/wrapper.py b/components/classifier/wrapper.py index 9a28aa3..b183d41 100644 --- a/components/classifier/wrapper.py +++ b/components/classifier/wrapper.py @@ -11,6 +11,7 @@ from library.main import LIBRARY from utilities import logger, mallet from components.classifier import vectors +from io import open TTK_ROOT = os.environ['TTK_ROOT'] diff --git a/components/common_modules/chunks.py b/components/common_modules/chunks.py index a2f6b6a..ea4b552 100644 --- a/components/common_modules/chunks.py +++ b/components/common_modules/chunks.py @@ -29,6 +29,7 @@ from components.evita.settings import EVITA_NOM_WNPRIMSENSE_ONLY from utilities import logger +from io import open # Get the Bayesian event recognizer diff --git a/components/evita/wordnet.py b/components/evita/wordnet.py index faeb6de..a5cbe0b 100644 --- a/components/evita/wordnet.py +++ b/components/evita/wordnet.py @@ -8,6 +8,7 @@ from library import forms import utilities.binsearch as binsearch import utilities.logger as logger +from io import open # Open dbm's with information about nominal events. If that does not work, open diff --git a/components/evita/wrapper.py b/components/evita/wrapper.py index 4acb52a..d7cf3d1 100644 --- a/components/evita/wrapper.py +++ b/components/evita/wrapper.py @@ -8,6 +8,7 @@ from library.tarsqi_constants import EVITA from library.main import LIBRARY from components.evita.main import Evita +from io import open # Set this to True if you want to do a simplistic evaluation of how many of the diff --git a/components/merging/sputlink/graph.py b/components/merging/sputlink/graph.py index c5f5b26..d008314 100644 --- a/components/merging/sputlink/graph.py +++ b/components/merging/sputlink/graph.py @@ -19,6 +19,7 @@ from .mappings import abbreviate_convex_relation from utilities import logger from library.main import LIBRARY +from io import open DEBUG = True DEBUG = False diff --git a/components/merging/sputlink/main.py b/components/merging/sputlink/main.py index 3ba6a03..32a55aa 100644 --- a/components/merging/sputlink/main.py +++ b/components/merging/sputlink/main.py @@ -8,6 +8,7 @@ from .utils import CompositionTable from .utils import html_graph_prefix from library.main import LIBRARY +from io import open DEBUG = False DEBUG = True diff --git a/components/merging/sputlink/rules/generateRules.py b/components/merging/sputlink/rules/generateRules.py index 513910f..1b99e21 100644 --- a/components/merging/sputlink/rules/generateRules.py +++ b/components/merging/sputlink/rules/generateRules.py @@ -14,6 +14,7 @@ Node, EventNode, TimexNode, Point, Link, PLink from closure import Axiom, Closure +from io import open settings = {} settings['debug'] = 0 diff --git a/components/merging/sputlink/utils.py b/components/merging/sputlink/utils.py index b8cfdba..454b2f4 100644 --- a/components/merging/sputlink/utils.py +++ b/components/merging/sputlink/utils.py @@ -1,5 +1,7 @@ +from __future__ import absolute_import +from io import open def intersect_relations(rels1, rels2): """Returns the intersection of two relation sets. Returns None if both of the two sets are None.""" diff --git a/components/simpletime/main.py b/components/simpletime/main.py index bd81091..2615a70 100644 --- a/components/simpletime/main.py +++ b/components/simpletime/main.py @@ -1,5 +1,6 @@ from __future__ import absolute_import import os, sys +from io import open def elements_file(): diff --git a/deprecated/demo/display.py b/deprecated/demo/display.py index d5a7388..bcd17c9 100644 --- a/deprecated/demo/display.py +++ b/deprecated/demo/display.py @@ -27,6 +27,7 @@ import os from docmodel.xml_parser import Parser, create_dct_element from library.tarsqi_constants import BLINKER, SLINKET, S2T, CLASSIFIER +from io import open TTK_ROOT = os.environ['TTK_ROOT'] diff --git a/deprecated/get_lexes.py b/deprecated/get_lexes.py index 9d3e8dd..58f89f5 100644 --- a/deprecated/get_lexes.py +++ b/deprecated/get_lexes.py @@ -19,6 +19,7 @@ import sys, os from xml.dom.minidom import parse +from io import open def getText(nodelist): diff --git a/deprecated/get_tags.py b/deprecated/get_tags.py index 01a7af3..0b1ef10 100644 --- a/deprecated/get_tags.py +++ b/deprecated/get_tags.py @@ -12,6 +12,7 @@ import os, glob from xml.dom import minidom +from io import open INDIR = "data/out/Timebank" diff --git a/deprecated/gui.py b/deprecated/gui.py index cef8f09..c383df3 100644 --- a/deprecated/gui.py +++ b/deprecated/gui.py @@ -20,6 +20,7 @@ from docmodel.xml_parser import Parser from library.tarsqi_constants import PREPROCESSOR, GUTIME, EVITA, SLINKET, S2T from library.tarsqi_constants import CLASSIFIER, BLINKER, CLASSIFIER, LINK_MERGER +from io import open TTK_ROOT = os.environ['TTK_ROOT'] diff --git a/deprecated/sputlink/rule_creation/generateRules.py b/deprecated/sputlink/rule_creation/generateRules.py index aae620e..8699356 100755 --- a/deprecated/sputlink/rule_creation/generateRules.py +++ b/deprecated/sputlink/rule_creation/generateRules.py @@ -13,6 +13,7 @@ Node, EventNode, TimexNode, Point, Link, PLink from closure import Axiom, Closure +from io import open settings = {} settings['debug'] = 0 diff --git a/deprecated/xml_parser.py b/deprecated/xml_parser.py index 8890a94..0e7d9ac 100644 --- a/deprecated/xml_parser.py +++ b/deprecated/xml_parser.py @@ -18,6 +18,7 @@ from types import UnicodeType, IntType, StringType, NoneType import xml.parsers.expat from xml.sax.saxutils import escape, quoteattr +from io import open # variable used for assigning unique IDs to XmlDocElements diff --git a/docmodel/document.py b/docmodel/document.py index 9780beb..681682d 100644 --- a/docmodel/document.py +++ b/docmodel/document.py @@ -12,6 +12,7 @@ from library.main import LIBRARY from utilities import logger +from io import open TIMEX = LIBRARY.timeml.TIMEX diff --git a/docmodel/main.py b/docmodel/main.py index 9d1149a..5ac0c76 100644 --- a/docmodel/main.py +++ b/docmodel/main.py @@ -21,6 +21,7 @@ from docmodel.docstructure_parser import DocumentStructureParser from utilities import logger +from io import open PARSERS = {'ttk': (SourceParserTTK, MetadataParserTTK), diff --git a/docmodel/source_parser.py b/docmodel/source_parser.py index cf0341a..a5b769a 100644 --- a/docmodel/source_parser.py +++ b/docmodel/source_parser.py @@ -43,6 +43,7 @@ from docmodel.document import SourceDoc, ProcessingStep from utilities.lif import Container, LIF +from io import open class SourceParser(object): @@ -214,7 +215,11 @@ def parse_file(self, filename, tarsqidoc): text and tags in SourceDoc.""" self.sourcedoc = SourceDoc(filename) # TODO: should this be codecs.open() for non-ascii? - self.parser.ParseFile(open(filename)) + # self.parser.ParseFile(open(filename)) + # NOTE: actually, the above line needed to replaced with the following + # while preparing to port code to Python3. + content = open(filename).read() + self.parser.Parse(content) self.sourcedoc.finish() tarsqidoc.sourcedoc = self.sourcedoc diff --git a/docs/notes/python3.md b/docs/notes/python3.md index 8b87de8..87178d6 100644 --- a/docs/notes/python3.md +++ b/docs/notes/python3.md @@ -5,7 +5,7 @@ December 2020. This document has notes on the effort to get a Python 3 version of TTK. Originally, the goal was to get a few steps along the way without any impact to TTK users, that is, no extra installations, not even something as simple as `pip install future` or `pip install builtins`. The goal was also to eventually change the code so that it supports both Python 2.7 and Python 3.5 and up. -This was all dropped. The goal is to get a Python3 version in TTK version 3.0.0. Period. Probably version 3.6. This might require extra installation, fine. And version 2.7 will not be supported anymore from now on except perhaps for requested bug fixes on version 2.2.0. Having said that, the first steps of the process are all steps where the resulting code will still run on Python 2.7 +This was all dropped. The goal is to get a Python3 version in TTK version 3.0.0. Period. Probably version 3.6. This might require extra installation, fine. And version 2.7 will not be supported anymore from now on except perhaps for requested bug fixes on version 2.2.0. Having said that, the first steps of the process are all steps where the resulting code will still run on Python 2.7. @@ -19,7 +19,9 @@ For now: - After each step, review the changes, do not yet make any manual edits - After one or more steps, run the tests and if they pass put all changes in the git staging area. - - Try to isolate automatic amd manual changes in separate commits. The only exception is that this file may be updated in a commit alongside automatic changes. + - Try to isolate automatic amd manual changes in separate commits. The only exception are: + - A porting issue where a automatic step is followed by well-defined manual steps, these sometimes make more sense in just one commit. + - This file may be updated in a commit alongside automatic changes, typically changes are relevant to the porting issue of the commit. - Also looking at [http://python3porting.com/preparing.html](http://python3porting.com/preparing.html) to do those preparatory changes that allow you to still run under Python2 (division, strings, new classes, etcetera). There obviously is major overlap between this one and the previous one. - Commands that give information: @@ -35,11 +37,11 @@ In the following, each section corresponds to one or more commits on the `79-pyt -### 2. Initial syntax changes +### 2. Initial syntax changes -These changes are based on https://portingguide.readthedocs.io/en/latest/syntax.html. +These changes are based on https://portingguide.readthedocs.io/en/latest/syntax.html. All changes made in this section are in commit [94b2bce5](https://github.com/tarsqi/ttk/commit/94b2bce5e5b68e688d4d385bcb2b022b4f1e7093). -Syntax Change 1: Get rid of tabs +#### 2.1. Getting rid of tabs ``` find . -name '*.py' -type f -exec bash -c 'T=$(mktemp); expand -i -t 8 "$0" > "$T" && mv "$T" "$0"' {} \; @@ -57,7 +59,7 @@ $ mv wordnet.new.py wordnet.py No problems with that. -Syntax Change 2: Tuple Unpacking in Parameter Lists +#### 2.2. Tuple Unpacking in Parameter Lists ``` $ python-modernize -wnf lib2to3.fixes.fix_tuple_params . @@ -99,29 +101,27 @@ it is used for finding backward slinks and alinks. However, it turns out that this error also happens with the code before this change, so I will let it go. -Syntax Change 3: Backticks +#### 2.3. Backticks and other changes + +The following fixes backtics and there were no problems with it: ``` $ python-modernize -wnf lib2to3.fixes.fix_repr . ``` -No problems. - -Syntax Changes: Others +There was no need to remove the inequality operator `<>` and there were no assignments to True or False. Other syntax changes are done in later steps. -There was no need to remove the inequality operator `<>` and there were no assignments to True or False. Other syntax changes are done in later steps. Want to find out why. -The changes in this section are in commit [94b2bce5](https://github.com/tarsqi/ttk/commit/94b2bce5e5b68e688d4d385bcb2b022b4f1e7093). -### 2. More preparatory changes +### 3. More preparatory changes Based on [http://python3porting.com/preparing.html](http://python3porting.com/preparing.html). These are similar to the above in that they allow the code to still run on Python 2.7. -Division of integers +#### 3.1. Division of integers Using // when we really want to have integers as the result, using / in other cases. Sometimes using `from __future__ import division` and removing explicit conversions into floats. See commit [047d9c28](https://github.com/tarsqi/ttk/commit/047d9c2850b5589e05641e182f69704f8787bb09). -Using new style classes +#### 3.2. Using new style classes Doing this all manually, but used the following code to find the classes. @@ -135,13 +135,13 @@ def check_classes(fname): if line.startswith('class ') and line.endswith(':'): if is_old_class(line): print(fname, '>>>', line) - if has_muliple_parents(line): + if has_multiple_parents(line): print(fname, '===', line) def is_old_class(line): return '(' not in line or line.endswith('():') -def has_muliple_parents(line): +def has_multiple_parents(line): return ',' in line if __name__ == '__main__': @@ -188,7 +188,7 @@ After this it all worked, but see the TODO comment in `create_dicts.py` which el See commit [0ccd82d9](https://github.com/tarsqi/ttk/commit/0ccd82d9f11e72c75b7bcbb5e71044b87a818385). -Absolute imports +#### 3.3. Absolute imports Changed made here are based on [https://portingguide.readthedocs.io/en/latest/imports.html](https://portingguide.readthedocs.io/en/latest/imports.html). @@ -208,3 +208,143 @@ After this I made some changes to streamline some of the utilities and testing s $ python -m testing.run_tests ``` +See commit [465f9dfc](https://github.com/tarsqi/ttk/commit/465f9dfcdb6ef4b5051d6d5732f1ef116c4486f6). + +#### 3.4. String handling + +Changed made here are based on [https://portingguide.readthedocs.io/en/latest/strings.html](https://portingguide.readthedocs.io/en/latest/strings.html). + +For separating binary data and strings I glanced over all the lines with quoted strings in them, they all appear to be text strings. Did this for cases like `"hello"` with the find.pl script and created find.py to do the same for use of `'string'`, `u"string"`, `u'string'`, byte strings with the b prefix and the raw string. + +String operations in Python 3 cannot mix different types, look into this. Using -3 on the tarsqi.py script and run_tests.py gives 2 warnings on Blinker code: + +``` +compare.py:150: DeprecationWarning: comparing unequal types not supported in 3.x + if (year1_int < year2_int): +compare.py:152: DeprecationWarning: comparing unequal types not supported in 3.x + elif (year1_int > year2_int): +``` + +However, looking at the code it seems that both variables must be of type *int*. + +For type checking (use of basestring) I ran + +``` +$ python-modernize -wnf libmodernize.fixes.fix_basestring . +``` + +which didn't make any changes. + +##### 3.4.1 File I/O + +``` +$ python-modernize -wnf libmodernize.fixes.fix_open . +``` + +This caused many changes, most are to add + +```python +from io import open +``` + +We do get some problems, here's the first: + +``` +python tarsqi.py data/in/simple-xml/tiny.xml out.xml +Traceback (most recent call last): + File "tarsqi.py", line 111, in + from components import COMPONENTS, valid_components + File ".../ttk/git/ttk/components/__init__.py", line 6, in + from preprocessing.wrapper import PreprocessorWrapper + File ".../ttk/git/ttk/components/preprocessing/wrapper.py", line 28, in + from components.preprocessing.chunker import chunk_sentences + File ".../ttk/git/ttk/components/preprocessing/chunker.py", line 30, in + from components.common_modules.tree import create_tarsqi_tree + File ".../ttk/git/ttk/components/common_modules/tree.py", line 9, in + from components.common_modules.chunks import NounChunk, VerbChunk + File ".../ttk/git/ttk/components/common_modules/chunks.py", line 24, in + from components.evita import bayes + File ".../ttk/git/ttk/components/evita/bayes.py", line 16, in + DictSemcorContext = open_pickle_file(forms.DictSemcorContextPickleFilename) + File ".../ttk/git/ttk/utilities/file.py", line 45, in open_pickle_file + return pickle.load(fh) + File ".../miniconda2/lib/python2.7/pickle.py", line 1384, in load + return Unpickler(file).load() + File ".../miniconda2/lib/python2.7/pickle.py", line 864, in load + dispatch[key](self) + File ".../miniconda2/lib/python2.7/pickle.py", line 986, in load_unicode + self.append(unicode(self.readline()[:-1],'raw-unicode-escape')) +TypeError: decoding Unicode is not supported +``` + +The problem here is in the code that loads a pickle file (`utilities/file.py`), which reads a native string where it should read a binary string, change `open_pickle_file()` into + +```python +def open_pickle_file(fname): + """Return the contents of a pickle file.""" + with open(fname, 'r') as fh: + return pickle.load(fh) +``` + +The next problem is in the logger: + +``` + File ".../ttk/git/ttk/utilities/logger.py", line 76, in __init__ + self.html_file.write("\n") +TypeError: write() argument 1 must be unicode, not str +``` + +While this would work fine in Python3, in Python 2 the string has to be of type unicode and the string literal is not that type so we need to add a `u` in front of it. + +And then: + +``` + File ".../ttk/git/ttk/docmodel/source_parser.py", line 218, in parse_file + self.parser.ParseFile(open(filename)) +TypeError: read() did not return a string object (type=unicode) +``` + +This could be fixed in docmodel.source_parser.SourceParserXml.parse_file, replacing + +```python +self.parser.ParseFile(open(filename)) +``` + +with + +```python +content = open(filename).read() +self.parser.Parse(content) +``` + +Finally, the last problem: + +``` + File ".../ttk/git/ttk/library/blinker/blinker_rule_loader.py", line 113, in read_syntactic_rules + val = str.split(val[1:-1], '|') +TypeError: descriptor 'split' requires a 'str' object but received a 'unicode' +``` + +Blinker contained an ancient line that used str.split(), replaced it with + +```python +val = val[1:-1].split('|') +``` + +And finally the tarsqi script works and so does the basic test script and the evita regression test... but not for writing the regression report, which takes us to the next section. + +##### 3.4.1. Unresolved string issue + +Generating a report for the regression tests croaks the same way as the logger: + +``` +$ python -m testing.regression --report +... + File ".../ttk/git/ttk/testing/regression.py", line 181, in write_index + self.index_fh.write("\n\n") +TypeError: write() argument 1 must be unicode, not str +``` + +This will probably show up all over the place. The annoying part is that the code would work fine on Python3 but to make it run on Python 2 we need to track down all those string literals, which I really do not want to do. So for now we just fix it for the code I need for porting to python3, including showing the results of the regression tests (and whatever else comes up later). + + diff --git a/library/blinker/blinker_rule_loader.py b/library/blinker/blinker_rule_loader.py index 3e66883..74264b9 100644 --- a/library/blinker/blinker_rule_loader.py +++ b/library/blinker/blinker_rule_loader.py @@ -1,6 +1,7 @@ from __future__ import absolute_import import os import re +from io import open TTK_ROOT = os.environ['TTK_ROOT'] @@ -109,7 +110,7 @@ def read_syntactic_rules(rule_file): if val[0] != '(': val = [val] else: - val = str.split(val[1:-1], '|') + val = val[1:-1].split('|') current_rule.set_attribute(att, val) continue diff --git a/library/classifier/create_vectors.py b/library/classifier/create_vectors.py index a3fbec2..4a6bc7b 100644 --- a/library/classifier/create_vectors.py +++ b/library/classifier/create_vectors.py @@ -106,6 +106,7 @@ from components.classifier.vectors import collect_tarsqidoc_vectors from library.main import LIBRARY +from io import open GOLD_DIR = os.path.join('data', 'gold') diff --git a/library/evita/build_event_nominals1.py b/library/evita/build_event_nominals1.py index e0f5e12..55f1e30 100644 --- a/library/evita/build_event_nominals1.py +++ b/library/evita/build_event_nominals1.py @@ -23,6 +23,7 @@ import forms import anydbm from wntools import * +from io import open DEBUG = False diff --git a/library/evita/build_event_nominals2.py b/library/evita/build_event_nominals2.py index ef3c49a..0e31979 100644 --- a/library/evita/build_event_nominals2.py +++ b/library/evita/build_event_nominals2.py @@ -24,6 +24,7 @@ import os import sys import anydbm +from io import open # Open text versions of DBM files in Dicts directory. file1 = open(os.path.join('dictionaries', 'wnPrimSenseIsEvent.txt'), 'r') diff --git a/library/evita/compile_patterns.py b/library/evita/compile_patterns.py index dd5449e..6f4ca6b 100644 --- a/library/evita/compile_patterns.py +++ b/library/evita/compile_patterns.py @@ -23,6 +23,7 @@ from __future__ import absolute_import import os, sys, cPickle +from io import open sys.path.append('../..') diff --git a/library/evita/nominal_trainer.py b/library/evita/nominal_trainer.py index 0d601b7..b8c18a3 100644 --- a/library/evita/nominal_trainer.py +++ b/library/evita/nominal_trainer.py @@ -25,6 +25,7 @@ import evitaTimemlParser import forms +from io import open contentPos = re.compile(r'(NN$|NNS|VB|JJ)') featureFuncs = [lambda x: x.pos, lambda x: definiteness(x)] diff --git a/library/main.py b/library/main.py index 7955090..8e87b32 100644 --- a/library/main.py +++ b/library/main.py @@ -12,6 +12,7 @@ from __future__ import absolute_import import os +from io import open TTK_ROOT = os.environ['TTK_ROOT'] diff --git a/library/patterns.py b/library/patterns.py index ea0f37e..7d35f8e 100644 --- a/library/patterns.py +++ b/library/patterns.py @@ -1,6 +1,7 @@ from __future__ import absolute_import import os import cPickle +from io import open TTK_ROOT = os.environ['TTK_ROOT'] DIR_PATTERNS = os.path.join(TTK_ROOT, 'library', 'evita', 'patterns') diff --git a/library/s2t/s2t_rule_loader.py b/library/s2t/s2t_rule_loader.py index e325a1b..c81b912 100644 --- a/library/s2t/s2t_rule_loader.py +++ b/library/s2t/s2t_rule_loader.py @@ -1,6 +1,7 @@ from __future__ import absolute_import import os import re +from io import open TTK_ROOT = os.environ['TTK_ROOT'] diff --git a/library/slinket/create_dicts.py b/library/slinket/create_dicts.py index 620b385..30a603c 100644 --- a/library/slinket/create_dicts.py +++ b/library/slinket/create_dicts.py @@ -20,6 +20,7 @@ from __future__ import absolute_import import os, sys, cPickle +from io import open sys.path.append('../..') diff --git a/library/slinket/main.py b/library/slinket/main.py index ed4d516..3273096 100644 --- a/library/slinket/main.py +++ b/library/slinket/main.py @@ -1,6 +1,7 @@ from __future__ import absolute_import import os import cPickle +from io import open TTK_ROOT = os.environ['TTK_ROOT'] DIR_DICTS = os.path.join(TTK_ROOT, 'library', 'slinket', 'dictionaries') diff --git a/testing/regression.py b/testing/regression.py index dc7d318..d912597 100644 --- a/testing/regression.py +++ b/testing/regression.py @@ -46,6 +46,7 @@ import os, sys, getopt, time, glob import tarsqi +from io import open def load_cases(fname): @@ -177,10 +178,10 @@ def _init_cases(self): def write_index(self): """Create the idex for all results.""" - self.index_fh.write("\n\n") + self.index_fh.write(u"\n\n") for name in self.cases: - self.index_fh.write("%s\n" % (name, name)) - self.index_fh.write("\n\n") + self.index_fh.write(u"%s\n" % (name, name)) + self.index_fh.write(u"\n\n") def write_cases(self): """Create the file with results for all cases.""" @@ -194,7 +195,7 @@ def write_cases(self): self._write_case_report() def _load_cases(self): - self.case_input_file = "%s/cases-%s.tab" % (self.cases_dir, self.case) + self.case_input_file = u"%s/cases-%s.tab" % (self.cases_dir, self.case) self.case_input_fh = open(self.case_input_file) print " reading cases in", self.case_input_file self.case_input = {} @@ -203,7 +204,7 @@ def _load_cases(self): def _load_case_results(self): self.case_results = {} - for results_file in glob.glob("%s/%s/*.tab" % (self.results_dir, self.case)): + for results_file in glob.glob(u"%s/%s/*.tab" % (self.results_dir, self.case)): print ' reading results from', results_file timestamp = os.path.splitext(os.path.basename(results_file))[0] self.case_results[timestamp] = {} @@ -217,31 +218,31 @@ def _write_case_report(self): for ts in timestamps: for identifier in self.case_results[ts].keys(): identifiers[identifier] = True - self.case_fh.write("\n\n") - self.case_fh.write("\n\n\n") - self.case_fh.write("\n") - self.case_fh.write("") - self.case_fh.write("
 ") + self.case_fh.write(u"\n\n") + self.case_fh.write(u"\n\n\n") + self.case_fh.write(u"\n") + self.case_fh.write(u"") + self.case_fh.write(u" ") - self.case_fh.write(" ") + self.case_fh.write(u"
 ") for ts in timestamps: - self.case_fh.write(" %s" % ts[2:8]) - self.case_fh.write(" o1") - self.case_fh.write(" o2") - self.case_fh.write(" sentence") + self.case_fh.write(u" %s" % ts[2:8]) + self.case_fh.write(u" o1") + self.case_fh.write(u" o2") + self.case_fh.write(u" sentence") for identifier in sorted(identifiers.keys()): - self.case_fh.write("
%s" % identifier) + self.case_fh.write(u"
%s" % identifier) for ts in timestamps: - self.case_fh.write(" %s" % self.case_results[ts].get(identifier, ' ')) + self.case_fh.write(u" %s" % self.case_results[ts].get(identifier, ' ')) case = self.case_input[identifier] - self.case_fh.write(" %s" % case.o1) - self.case_fh.write(" %s" % case.o2) - self.case_fh.write(" %s%s%s" + self.case_fh.write(u" %s" % case.o1) + self.case_fh.write(u" %s" % case.o2) + self.case_fh.write(u" %s%s%s" % (case.sentence[:case.o1], case.sentence[case.o1:case.o2], case.sentence[case.o2:])) - self.case_fh.write("
") + self.case_fh.write(u"
") def generate_report(): diff --git a/utilities/code-stats.py b/utilities/code-stats.py index 926cf72..e563a9d 100644 --- a/utilities/code-stats.py +++ b/utilities/code-stats.py @@ -42,6 +42,7 @@ from __future__ import absolute_import import os, sys, subprocess +from io import open command = ['git', 'log', '-n', '1', '--date=short', '--pretty=format:"%ad %H"'] result = subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0] diff --git a/utilities/convert.py b/utilities/convert.py index d2c357f..7d1b1a3 100644 --- a/utilities/convert.py +++ b/utilities/convert.py @@ -81,6 +81,7 @@ from docmodel.main import create_docstructure_parser from docmodel.document import TarsqiDocument, Tag, ProcessingStep from library.main import TarsqiLibrary +from io import open DEBUG = True DEBUG = False diff --git a/utilities/file.py b/utilities/file.py index aac8a65..ee46984 100644 --- a/utilities/file.py +++ b/utilities/file.py @@ -3,6 +3,7 @@ from __future__ import absolute_import import os import pickle +from io import open def file_contents(filename): @@ -40,7 +41,7 @@ def write_text_to_file(text, filename): def open_pickle_file(fname): """Return the contents of a pickle file.""" - with open(fname, 'r') as fh: + with open(fname, 'rb') as fh: return pickle.load(fh) diff --git a/utilities/get_issues.py b/utilities/get_issues.py index 6c40113..fe7be01 100644 --- a/utilities/get_issues.py +++ b/utilities/get_issues.py @@ -27,6 +27,7 @@ from __future__ import absolute_import import os, sys, json, pprint, re from HTMLParser import HTMLParser +from io import open DEBUG = False diff --git a/utilities/logger.py b/utilities/logger.py index df14f8b..cec77a3 100644 --- a/utilities/logger.py +++ b/utilities/logger.py @@ -55,6 +55,7 @@ import os import sys import inspect +from io import open logger = None @@ -72,11 +73,11 @@ def __init__(self, filename, level=2): self.errors = 0 self.warnings = 0 self.html_file = open(filename + '.html', 'w') - self.html_file.write("\n") - self.html_file.write("\n\n\n") - self.html_file.write("\n\n\n\n") + self.html_file.write(u"\n") + self.html_file.write(u"\n\n\n") + self.html_file.write(u"\n\n
\n\n") def initialize_logger(filename, level=2): @@ -163,10 +164,10 @@ def _log(message_type, log_string): message_type = '' + message_type + '' # if message_type == 'DEBUG': # message_type = '' + message_type + '' - logger.html_file.write("\n\n") - logger.html_file.write(" \n") + logger.html_file.write(u"
%s\n" % (message_type)) - logger.html_file.write(" %s\n" % (trace)) - logger.html_file.write(" %s\n" % (log_string)) + logger.html_file.write(u"\n
%s\n" % (message_type)) + logger.html_file.write(u" %s\n" % (trace)) + logger.html_file.write(u" %s\n" % (log_string)) logger.html_file.flush() diff --git a/utilities/make_documentation.py b/utilities/make_documentation.py index 24d1e7a..a643ac7 100644 --- a/utilities/make_documentation.py +++ b/utilities/make_documentation.py @@ -38,6 +38,7 @@ from types import ClassType, FunctionType, MethodType, TypeType from .modules import MODULES +from io import open # Set this to True if you want the sources for all functions written to files # that are linked to from the module page, this slows down the code quite a bit diff --git a/utilities/stemmer.py b/utilities/stemmer.py index 9cecc61..ae41bd3 100644 --- a/utilities/stemmer.py +++ b/utilities/stemmer.py @@ -11,6 +11,7 @@ from __future__ import absolute_import from library.forms import STEM_EXCEPTIONS_FILE from .binsearch import binarySearchFile +from io import open class Stemmer(object): diff --git a/utilities/wntools.py b/utilities/wntools.py index 02a96ef..b6c6115 100644 --- a/utilities/wntools.py +++ b/utilities/wntools.py @@ -30,6 +30,7 @@ """ from __future__ import absolute_import +from io import open __author__ = "Oliver Steele " __version__ = "2.0" diff --git a/utilities/wordnet.py b/utilities/wordnet.py index 97af01d..f36d922 100644 --- a/utilities/wordnet.py +++ b/utilities/wordnet.py @@ -37,6 +37,7 @@ """ from __future__ import absolute_import +from io import open __author__ = "Oliver Steele " __version__ = "1.4"