-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval.py
115 lines (82 loc) · 2.98 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from argparse import ArgumentParser
from collections import defaultdict
from tree import parse_string
from pcyk import PCYK
import sys
def eval(args):
parser = PCYK(args.rules, args.lexicon, args.beam)
sum_lp = 0
sum_lr = 0
total = 0
with open(args.trees) as f:
for line in f:
gold = parse_string(line)
tokens = [node.label for node in gold.terminals()]
if args.maxlen and len(tokens) <= args.maxlen:
parsed = parser.parse(tokens)
if parsed:
parsed_tree = parsed[1]
print(parsed_tree)
unbinary_tree(parsed_tree)
correct_constitues = correct_constitue(parsed_tree, gold)
lp = correct_constitues / len(parsed_tree.nonterminals())
lr = correct_constitues / len(gold.nonterminals())
print(lp, lr, parsed_tree, gold, sep='\t', flush=True)
sum_lp += lp
sum_lr += lr
total += 1
else:
print('No parse found for "{}"'.format(' '.join(tokens)), file=sys.stderr)
print('Average LP =', sum_lp / total, file=sys.stderr)
print('Average LR =', sum_lr / total, file=sys.stderr)
print('Anzahl =', total, file=sys.stderr)
def correct_constitue(unbinary_tree, gold):
eval_set = compute_eval_set(gold)
agenda = [unbinary_tree]
lp = 0
while len(agenda) > 0:
c_tree = agenda.pop()
con = ""
for child in c_tree.children:
con += str(child)
if (c_tree.label, con) in eval_set:
lp += 1
agenda.extend(c_tree.children)
return lp
def compute_eval_set(tree):
eval_set = set()
agenda = [tree]
while len(agenda) > 0:
c_tree = agenda.pop()
con = ""
for child in c_tree.children:
con += str(child)
if con != "":
eval_set.add((c_tree.label, con))
agenda.extend(c_tree.children)
return eval_set
def unbinary_tree(tree):
agenda = [tree]
while len(agenda) > 0:
c_tree = agenda.pop()
# print(c_tree)
new_children_agenda = []
new_children_agenda.extend(c_tree.children)
new_children = []
while len(new_children_agenda) > 0:
new_child = new_children_agenda.pop()
if new_child.is_binary():
new_children_agenda.extend(new_child.children)
else:
new_children.append(new_child)
new_children.reverse()
c_tree.children = new_children
agenda.extend(c_tree.children)
if __name__ == '__main__':
ap = ArgumentParser()
ap.add_argument('--rules', type=str, default='rules.txt')
ap.add_argument('--lexicon', type=str, default='lexicon.txt')
ap.add_argument('--beam', type=int, default=100)
ap.add_argument('--maxlen', type=int, default=25)
ap.add_argument('trees')
eval(ap.parse_args())