-
Notifications
You must be signed in to change notification settings - Fork 1
/
fuzzy.go
36 lines (29 loc) · 953 Bytes
/
fuzzy.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
// Copyright © 2013-2018 Pierre Neidhardt <[email protected]>
// Use of this file is governed by the license that can be found in LICENSE.
package main
import (
"regexp"
"strings"
"github.com/ambrevar/damerau"
)
var (
reNorm = regexp.MustCompile(`\b0+|[^\pL\pN]`)
)
// Remove punctuation and padding zeros for number comparisons. Return the
// result in lowercase. This is useful to make string relations more relevant.
func stringNorm(s string) string {
return strings.ToLower(reNorm.ReplaceAllString(s, ""))
}
// Return the Damerau-Levenshtein distance divided by the length of the longest
// string, so that two identical strings return 1, and two completely unrelated
// strings return 0.
func stringRel(a, b string) float64 {
max := len([]rune(a))
if len([]rune(b)) > max {
max = len([]rune(b))
} else if max == 0 {
return 1
}
distance := damerau.DamerauLevenshteinDistance(a, b)
return 1 - float64(distance)/float64(max)
}