-
Notifications
You must be signed in to change notification settings - Fork 0
/
lang.go
95 lines (78 loc) · 1.39 KB
/
lang.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package lang
import (
"bufio"
"fmt"
"io"
"strings"
"github.com/wmentor/mcounter"
ngram "github.com/wmentor/qgram"
)
const (
UnknownLang string = "??"
)
var (
Langs []string
data map[string]string
)
func init() {
Langs = []string{"am", "de", "el", "en", "es", "fr", "it", "ka", "ru"}
data = map[string]string{}
for _, name := range Langs {
loadLang(name)
}
}
func loadLang(name string) {
in, err := fs.Open(name + ".txt")
if err != nil {
panic("unknown data file: " + name)
}
defer in.Close()
br := bufio.NewReader(in)
for {
str, err := br.ReadString('\n')
if err != nil && str == "" {
break
}
if str = strings.TrimSpace(str); str != "" {
if v, has := data[str]; has {
list := strings.Fields(v)
has = false
for _, cur := range list {
if cur == name {
has = true
break
}
}
if !has {
data[str] = v + " " + name
}
} else {
data[str] = name
}
}
}
}
func Detect(in io.Reader) string {
lns := mcounter.New()
hash := ngram.CalcMap(in)
for k, v := range hash {
for _, l := range strings.Fields(data[k]) {
lns.Inc(l, uint64(v))
}
}
if len(lns) == 0 {
return UnknownLang
}
list := lns.Slice(1, true)
if len(list) == 0 {
return UnknownLang
}
return list[0]
}
func Conflicts() {
for w, ls := range data {
if list := strings.Fields(ls); len(list) > 1 {
fmt.Printf("%s %s\n", w, ls)
}
}
}