-
Notifications
You must be signed in to change notification settings - Fork 0
/
20.py
118 lines (94 loc) · 3.2 KB
/
20.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#! /usr/bin/python2.7
# -*- coding: utf-8 -*-
from time import time
from konlpy import tag
from konlpy.corpus import kolaw
from konlpy.utils import pprint
def tagging(tagger, text):
r = []
try:
print (text)
r = getattr(tag, tagger)().pos(text)
except Exception as e:
pass
print ("Uhoh,", e)
return r
def measure_time(taggers, mult=6):
doc = [u'아버지가방에들어가신다', # 띄어쓰기
u'나는 밥을 먹는다', u'하늘을 나는 자동차', # 중의성 해소
u'아이폰 기다리다 지쳐 애플공홈에서 언락폰질러버렸다 6+ 128기가실버ㅋ'] # 속어
data = [['n'] + taggers]
for i in range(mult):
doclen = 10**i
times = [time()]
diffs = [doclen]
for tagger in taggers:
r = tagging(tagger, doc[:doclen])
times.append(time())
diffs.append(times[-1] - times[-2])
# print ('%s\t%s\t%s' % (tagger[:5], doclen, diffs[-1]))
# pprint(r[:5])
data.append(diffs)
print
return data
def measure_accuracy(taggers, text):
# print ('\n%s' % text)
result = []
for tagger in taggers:
print (tagger)
r = tagging(tagger, text)
pprint(r)
result.append([tagger] + list(map(lambda s: ' / '.join(s), r)))
return result
def plot(result):
import matplotlib
matplotlib.use('TkAgg')
from matplotlib import pylab as pl
import scipy as sp
if not result:
result = sp.loadtxt('morph.csv', delimiter=',', skiprows=1).T
x, y = result[0], result[1:]
for i in y:
pl.plot(x, i)
pl.xlabel('Number of characters')
pl.ylabel('Time (sec)')
pl.xscale('log')
pl.grid(True)
pl.savefig("images/time.png")
pl.show()
if __name__=='__main__':
PLOT = True
MULT = 6
examples = [u'아버지가방에들어가신다', # 띄어쓰기
u'나는 밥을 먹는다', u'하늘을 나는 자동차', # 중의성 해소
u'아이폰 기다리다 지쳐 애플공홈에서 언락폰질러버렸다 6+ 128기가실버ㅋ'] # 속어
taggers = [t for t in dir(tag) if t[0].isupper()]
print ("THIS IS TAGGERS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
print ("LOOK AT THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
print (taggers)
# Time
data = measure_time(taggers, mult=MULT)
with open('morph.csv', 'w') as f:
# csvwrite(data, f)
for li in data:
line = ','.join(str(v) for v in li)
line = line + '\n'
f.write(line)
# Accuracy
for i, example in enumerate(examples):
result = measure_accuracy(taggers, example)
print (result)
result = list(map(lambda *row: [i or '' for i in row], *result))
with open('morph-%s.csv' % i, 'w') as f:
# csvwrite(result, f)
for li in result:
line = ','.join(str(v) for v in li)
line = line + '\n'
f.write(line)
# Plot
if PLOT:
print ("---------------------------------")
print (result)
print ("---------------------------------")
plot(result)
print ("Plotting is done!")