forked from mikahama/uralicNLP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_uralicnlp.py
121 lines (88 loc) · 3.58 KB
/
test_uralicnlp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#encoding: utf-8
from uralicNLP import uralicApi
from uralicNLP.cg3 import Cg3, Cg3Pipe
from uralicNLP.translate import *
from uralicNLP import dependency
from uralicNLP.ud_tools import UD_collection
from uralicNLP.dictionary_backends import MongoDictionary
import re
from mikatools import *
uralicApi.get_all_forms("kissa", "N", "fin")
#uralicApi.get_transducer("spa", analyzer=True).lookup_optimize()
#print(uralicApi.analyze("hola", "spa"))
#print(type(uralicApi.get_transducer("spa", analyzer=True)))
#print()
#print(uralicApi.supported_languages())
#uralicApi.download("fin")
"""
print(uralicApi.analyze("voita", "fin"))
print(uralicApi.analyze("voita", "fin", descrpitive=False))
print(uralicApi.analyze("voita", "fin"))
print(uralicApi.analyze("voita", "fin", descrpitive=False))
print(uralicApi.generate("käsi+N+Sg+Par", "fin"))
print(uralicApi.generate("käsi+N+Sg+Par", "fin"))
print(uralicApi.generate("käsi+N+Sg+Par", "fin", descrpitive=True))
print(uralicApi.generate("käsi+N+Sg+Par", "fin", descrpitive=True))
print(uralicApi.generate("käsi+N+Sg+Par", "fin", dictionary_forms=False))
print(uralicApi.generate("käsi+N+Sg+Par", "fin", dictionary_forms=False))
print(uralicApi.generate("käsi+N+Sg+Par", "deu"))
#print(uralicApi.dictionary_search("car", "sms"))
print(uralicApi.lemmatize("voita", "fin", descrpitive=True))
#uralicApi.download("kpv")
"""
"""
cg = Cg3("fin")
print(cg.disambiguate(["Kissa","voi","nauraa", "."], descrpitive=True))
cg = Cg3("kpv")
print(cg.disambiguate("театрӧ пыран абонемент".split(" ")))
"""
#print (uralicApi.lemmatize("livsmedel", "swe",force_local=True, word_boundaries=True))
"""
for w in ["الكتاب", "الكاتب", "الميكا", "المكتوب", "كلب", "كلبين", "كلاب", "كلبتي", "كلبي", "قلب", "قلبين"]:
print("\n\n" +w)
print(uralicApi.analyze(w,"ara"))
print(uralicApi.lemmatize(w,"ara"))
print(uralicApi.generate("+noun+humanكاتب+masc+pl@","ara"))
str = "+adj{كَلْبِيّ}+masc+sg@"
print(re.findall(r"[ء-ي]+", str))
"""
"""
print(uralicApi.analyze("kissa", "fin"))
print(uralicApi.analyze("on", ["fin","olo"]))
print(uralicApi.analyze("on", ["fin","olo"], language_flags=True))
cg = Cg3("fin", morphology_languages=["fin", "olo"])
print(cg.disambiguate(["Kissa","on","kotona", "."], language_flags=True))
"""
"""
cg = Cg3("fin")
cg2 = Cg3("rus")
cg_pipe = Cg3Pipe(cg, cg2)
print(cg_pipe.disambiguate(["Kissa","on","kotona", "."]))
"""
#print(uralicApi.dictionary_lemmas("sms", group_by_pos=True))
#print(uralicApi.dictionary_search("car", "sms",backend=MongoDictionary))
#print(uralicApi.dictionary_search("byrokratti", "sms",backend=MongoDictionary))
#print(uralicApi.dictionary_search("tavallinen ihminen", "sms",backend=MongoDictionary))
"""
print(uralicApi.analyze("cats", "eng"))
print(uralicApi.generate("cat[N]+N+PL", "eng"))
print(uralicApi.lemmatize("cats", "eng"))
"""
"""
translator = ApertiumGiellateknoTranslator()
print(translator.translate("kissa juoksee kovaa", "fin","sme"))
translator = ApertiumStableTranslator()
print(translator.translate("el gato corre rápido", "spa","cat"))
"""
"""
ud = dependency.parse_text("kissa nauroi kovaa\nLehmä lauloi ainiaan", "fin",url="http://localhost:9877")
for sentence in ud:
for word in sentence:
print word.pos, word.lemma, word.get_attribute("deprel")
print "---"
"""
ud = UD_collection(open_read("test_data/fi_test.conllu"))
sentences = ud.find_sentences(query={"lemma": "olla"}) #finds all sentences with the lemma kissa
for sentence in sentences:
word = sentence.find(query={"lemma": "olla"})
print(word[0].get_attribute("form"))