-
Notifications
You must be signed in to change notification settings - Fork 0
/
mecab.rb
executable file
·66 lines (56 loc) · 1.27 KB
/
mecab.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env ruby
# coding: utf-8
require 'natto'
require 'csv'
nm = Natto::MeCab.new('-F%f[6],%f[0],%f[1]')
dic = CSV.read(ARGV[0])
level_hash = Hash.new
word_count_hash = Hash.new(0)
dic.each do |level,word|
level_hash[word] = level.to_i
end
level_count_hash = Hash.new(0)
while line = STDIN.gets
nm.enum_parse(line).reject do |n|
n.is_eos?
end.map do |n|
n.feature.split(",")
end.select do |word, a, b|
word != '' && ( ( a == '名詞' && b != '固有名詞')|| a == '動詞' )
end.each do |word, a, b|
if level = level_hash[word]
word_count_hash[word] += 1
else
level_count_hash[0] += 1
end
end
end
word_count_hash.each do |word,count|
level = level_hash[word]
level_count_hash[level] += count
end
sum = level_count_hash.values.inject(0) { |sum, i| sum + i }
percentage = level_count_hash.map do |key,val|
[key , (val.to_f / sum * 100).to_i ]
end.to_h
keys = {
# 0 => [0], # 0はいらない
1 => [1,2,3,4,5],
2 => [2,3,4,5],
3 => [3,4,5],
4 => [4,5],
5 => [5],
}
result = Hash.new
keys.each do |level, levels|
level_sum = levels.inject(0) do |sum, l|
num = percentage[l]
if num
sum += num
else
sum
end
end
result[level] = level_sum
end
puts result.map { |key,value| value }.join(",")