-
Notifications
You must be signed in to change notification settings - Fork 2
/
utlglinearise.py
77 lines (59 loc) · 2.42 KB
/
utlglinearise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from matxin_lineariser.utlgrammars.lineariser import Lineariser
from matxin_lineariser.utlgrammars.printing import Printing
from matxin_lineariser.utlgrammars.sentence import Sentence
from argparse import ArgumentParser
from pprint import PrettyPrinter
from sys import stdin, stdout, stderr
from xml.etree import ElementTree
argument_parser = ArgumentParser()
argument_parser.add_argument(
'-m', '--matxin', action='store_true', dest='matxin')
argument_parser.add_argument(
'-1', '--1-best', action='store_const', const=1, default=0, dest='n')
argument_parser.add_argument(
'-v', '--verbose', action='store_true', dest='verbose')
argument_parser.add_argument(
'-s', '--shuffle', action='store_true', dest='shuffle')
argument_parser.add_argument('--projectivise', action='store_true')
argument_parser.add_argument('xml')
arguments = argument_parser.parse_args()
lineariser = Lineariser()
def print_verbose(sentence):
print('sentence = ' + str(sentence), file=stderr)
print(
'sentence.get_linearisations() = ' + Printing.print_list(
sentence.get_linearisations(), print_item=Printing.print_list),
file=stderr)
stderr.flush()
def main():
with open(arguments.xml) as xml:
lineariser.deserialise(xml)
if arguments.matxin:
etree = ElementTree.parse(stdin)
corpus_etree = etree.getroot()
for sentence in Sentence.deserialise_matxin(corpus_etree):
if arguments.projectivise:
sentence.projectivise()
sentence.linearise(lineariser, arguments.n, arguments.shuffle)
for ref, wordline in enumerate(
sentence.get_linearisations()[0], start=1):
wordline.node_etree.set('ord', str(ref))
if arguments.verbose:
print_verbose(sentence)
etree.write(stdout, encoding='unicode', xml_declaration=True)
return
if arguments.n != 1:
pretty_printer = PrettyPrinter()
for sentence in Sentence.deserialise(stdin):
if arguments.projectivise:
sentence.projectivise()
sentence.linearise(lineariser, arguments.n, arguments.shuffle)
if arguments.verbose:
print_verbose(sentence)
if arguments.n == 1:
print(sentence.get_strings()[0], flush=True)
continue
pretty_printer.pprint(sentence.get_strings())
stdout.flush()
if __name__ == '__main__':
main()