Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance and cleanup code #29

Merged
merged 5 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@ jobs:
test:
strategy:
matrix:
go-version: [1.19.x, 1.20.x]
go-version: [1.22.x, 1.23.x]
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Install Go
uses: actions/setup-go@v2
uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go-version }}
- name: Checkout code
uses: actions/checkout@v2
uses: actions/checkout@v4
- name: Lint
run: make lint
- name: Test
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
module github.com/agnivade/levenshtein

go 1.13
go 1.21

require (
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54
)
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 h1:fRzb/w+pyskVMQ+UbP35JkH8yB7MYb4q/qhBarqZE6g=
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7cNTs5R6Hk4V2lcmLz2NsG2VnInyNo=
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
28 changes: 20 additions & 8 deletions levenshtein.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,25 @@ func ComputeDistance(a, b string) int {
if len(s1) > len(s2) {
s1, s2 = s2, s1
}

// remove trailing identical runes.
for i := 0; i < len(s1); i++ {
if s1[len(s1)-1-i] != s2[len(s2)-1-i] {
s1 = s1[:len(s1)-i]
s2 = s2[:len(s2)-i]
break
}
}

// Remove leading identical runes.
for i := 0; i < len(s1); i++ {
if s1[i] != s2[i] {
s1 = s1[i:]
s2 = s2[i:]
break
}
}
agnivade marked this conversation as resolved.
Show resolved Hide resolved

lenS1 := len(s1)
lenS2 := len(s2)

Expand Down Expand Up @@ -71,7 +90,7 @@ func ComputeDistance(a, b string) int {
for j := 1; j <= lenS1; j++ {
current := x[j-1] // match
if s2[i-1] != s1[j-1] {
current = min(min(x[j-1]+1, prev+1), x[j]+1)
current = min(x[j-1]+1, prev+1, x[j]+1)
}
x[j-1] = prev
prev = current
Expand All @@ -80,10 +99,3 @@ func ComputeDistance(a, b string) int {
}
return int(x[lenS1])
}

func min(a, b uint16) uint16 {
if a < b {
return a
}
return b
}
32 changes: 27 additions & 5 deletions levenshtein_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,35 @@ func BenchmarkSimple(b *testing.B) {
name string
}{
// ASCII
{"levenshtein", "frankenstein", "ASCII"},
{a: "levenshtein", b: "frankenstein", name: "ASCII"},
// Testing acutes and umlauts
{"resumé and café", "resumés and cafés", "French"},
{"Hafþór Júlíus Björnsson", "Hafþor Julius Bjornsson", "Nordic"},
{"a very long string that is meant to exceed", "another very long string that is meant to exceed", "long string"},
{a: "resumé and café", b: "resumés and cafés", name: "French"},
{a: "Hafþór Júlíus Björnsson", b: "Hafþor Julius Bjornsson", name: "Nordic"},

// Long strings
{
a: "a very long string that is meant to exceed",
b: "another very long string that is meant to exceed",
name: "Long lead",
},
{
a: "a very long string with a word in the middle that is different",
b: "a very long string with some text in the middle that is different",
name: "Long middle",
},
{
a: "a very long string with some text at the end that is not the same",
b: "a very long string with some text at the end that is very different",
name: "Long trail",
},
{
a: "+a very long string with different leading and trailing characters+",
b: "-a very long string with different leading and trailing characters-",
name: "Long diff",
},

// Only 2 characters are less in the 2nd string
{"།་གམ་འས་པ་་མ།", "།་གམའས་པ་་མ", "Tibetan"},
{a: "།་གམ་འས་པ་་མ།", b: "།་གམའས་པ་་མ", name: "Tibetan"},
}
tmp := 0
for _, test := range tests {
Expand Down
Loading