-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
74 lines (61 loc) · 2.17 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from parse_nouns import parse_nouns
from parse_verbs import parse_verbs
import codecs
import xml.etree.ElementTree as ET
def writeOpening(filename):
with codecs.open(filename, 'w', encoding="utf8") as fd:
fd.write("lexiconAncora{\n")
fd.close()
def writeEnding(filename):
with codecs.open(filename, 'a', encoding="utf8") as fd:
fd.write("}\n")
fd.close()
def get_nouns():
filename = 'UPF_AncoraDict_nouns.dic'
roots = parse_nouns.getRoot()
with open(filename, 'w') as fd:
fd.write("lexiconAncora{\n")
for root in roots:
senses = parse_nouns.parseXML(root)
for sense in senses:
with open(filename, 'a') as fd:
fd.write(sense.__str__().decode("utf-8"))
with open(filename, 'a') as fd:
fd.write('}\n')
def get_verbs(path):
if 'CAT' in path:
language = '_CA'
ancoranet_path = path+'ancoranet-ca.xml'
files_path = path+'ancora-verb-ca/'
else:
language = ''
ancoranet_path = path+'ancoranet-es.xml'
files_path = path+'ancora-verb-es/'
filename = 'UPF_AncoraDict_verbs{}.dic'.format(language)
roots = parse_verbs.getRoot(files_path)
root = ET.parse(ancoranet_path).getroot()
map = parse_verbs.getAncoraInfo(root)
with open(filename, 'w') as fd:
fd.write("lexiconAncora{\n")
for root_lex in roots:
senses = parse_verbs.getSenses(root_lex, map)
for sense in senses:
try:
if sense is not None:
with open(filename, 'a') as fd:
fd.write(sense.__str__().decode("utf-8"))
except:
if sense is not None:
with open(filename, 'a') as fd:
fd.write(sense.__str__())
writeEnding(filename)
if __name__ == '__main__':
language = input("Enter CAT or ES: ")
path = 'Resources/OriginalFiles/{}/'.format(language)
to_process = input("Enter noun or verb: ")
if to_process == 'noun':
if language == 'CAT':
print('[ERROR] Nouns not available in catalan.')
get_nouns()
if to_process == 'verb':
get_verbs(path)