-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdictionary.go
72 lines (62 loc) · 1.51 KB
/
dictionary.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
package main
import (
"bufio"
_ "embed"
"io"
"os"
"strings"
"unicode"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
//go:embed dictionary/american-english-large
var defaultDictionary string
var normalizer = transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
func removeAccents(str string) (string, error) {
s, _, err := transform.String(normalizer, str)
if err != nil {
return "", err
}
return s, err
}
type Dictionary struct {
words []string
}
func NewDictionary() (Dictionary, error) {
return NewDictionaryFromReader(strings.NewReader(defaultDictionary))
}
func NewDictionaryFromPath(path string) (Dictionary, error) {
file, err := os.Open(path)
if err != nil {
return Dictionary{}, err
}
defer file.Close()
return NewDictionaryFromReader(file)
}
func NewDictionaryFromReader(reader io.Reader) (Dictionary, error) {
uniqueWords := make(map[string]bool)
words := []string{}
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
// - Spelling Bee words must be 4 letters of more
// - removing accents allows us to match on words like "éclair"
// - Using a Set removes any duplicates
line := scanner.Text()
line, err := removeAccents(line)
if err != nil {
return Dictionary{}, err
}
if len(line) > 3 {
_, exists := uniqueWords[line]
if !exists {
uniqueWords[line] = true
words = append(words, line)
}
}
}
return Dictionary{words}, nil
}
func (dict Dictionary) Words() []string {
return dict.words
}