-
Notifications
You must be signed in to change notification settings - Fork 4
/
fuzmatch.go
50 lines (45 loc) · 1.69 KB
/
fuzmatch.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
package fuzmatch
import (
"strings"
"unicode/utf8"
)
//Ratio allow you to calculate the pourcentage of variance between two strings
//if the two strings are equals the function returns 1.
func Ratio(s1, s2 string) int {
if s1 == "" || s2 == "" {
return 0
}
len := utf8.RuneCountInString(s1) + utf8.RuneCountInString(s2)
dist := LevenshteinDistance(processString(s1), processString(s2))
return int((1 - (float32(dist) / float32(len))) * 100)
}
//PartialRatio allow you to calculate the "best partial" ratio. It takes
//the smaller string and we compare the smaller with a partial string from
//the bigger one. Could be useful if you have to compare two strings with
//very different length
func PartialRatio(s1, s2 string) int {
min, max := minMax(s1, s2)
var bestRatio int
for i := 0; i < len(max)-len(min)+1; i++ {
Ratio := Ratio(min, max[i:i+len(min)])
if Ratio > bestRatio {
bestRatio = Ratio
}
}
return bestRatio
}
//TokenSortRatio allow you to compare two strings "ordered" alphabetically
//so if you have two strings not ordered. This function could be useful.
func TokenSortRatio(s1, s2 string) int {
s1Sort, s2Sort := sortString(strings.Split(s1, " ")), sortString(strings.Split(s2, " "))
return Ratio(s1Sort, s2Sort)
}
//TokenSetRatio splits the strings in two groups : intersection and remainder
//and then we compare the group with each other.
func TokenSetRatio(s1, s2 string) int {
t0, t1, t2 := sortListTokenSetRatio(strings.Split(s1, " "), strings.Split(s2, " "))
t1, t2 = sumSlices(t0, t1), sumSlices(t0, t2)
t0str, t1str := strings.Join(t0, " "), strings.Join(t1, " ")
t2str := strings.Join(t2, " ")
return maximum(Ratio(t0str, t1str), Ratio(t0str, t2str), Ratio(t1str, t2str))
}