diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fbfb468..01c24d7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,16 +4,16 @@ jobs: test: strategy: matrix: - go-version: [1.19.x, 1.20.x] + go-version: [1.22.x, 1.23.x] os: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.os }} steps: - name: Install Go - uses: actions/setup-go@v2 + uses: actions/setup-go@v5 with: go-version: ${{ matrix.go-version }} - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Lint run: make lint - name: Test diff --git a/go.mod b/go.mod index 4fcfe43..f71f642 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,8 @@ module github.com/agnivade/levenshtein -go 1.13 +go 1.21 require ( github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 - github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 + github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 ) diff --git a/go.sum b/go.sum index 74d92aa..4cd4723 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,4 @@ github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q= github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE= -github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 h1:fRzb/w+pyskVMQ+UbP35JkH8yB7MYb4q/qhBarqZE6g= -github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= +github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7cNTs5R6Hk4V2lcmLz2NsG2VnInyNo= +github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= diff --git a/levenshtein.go b/levenshtein.go index f727a66..861f409 100644 --- a/levenshtein.go +++ b/levenshtein.go @@ -41,6 +41,25 @@ func ComputeDistance(a, b string) int { if len(s1) > len(s2) { s1, s2 = s2, s1 } + + // remove trailing identical runes. + for i := 0; i < len(s1); i++ { + if s1[len(s1)-1-i] != s2[len(s2)-1-i] { + s1 = s1[:len(s1)-i] + s2 = s2[:len(s2)-i] + break + } + } + + // Remove leading identical runes. + for i := 0; i < len(s1); i++ { + if s1[i] != s2[i] { + s1 = s1[i:] + s2 = s2[i:] + break + } + } + lenS1 := len(s1) lenS2 := len(s2) @@ -71,7 +90,7 @@ func ComputeDistance(a, b string) int { for j := 1; j <= lenS1; j++ { current := x[j-1] // match if s2[i-1] != s1[j-1] { - current = min(min(x[j-1]+1, prev+1), x[j]+1) + current = min(x[j-1]+1, prev+1, x[j]+1) } x[j-1] = prev prev = current @@ -80,10 +99,3 @@ func ComputeDistance(a, b string) int { } return int(x[lenS1]) } - -func min(a, b uint16) uint16 { - if a < b { - return a - } - return b -} diff --git a/levenshtein_test.go b/levenshtein_test.go index dd296d3..dd3607a 100644 --- a/levenshtein_test.go +++ b/levenshtein_test.go @@ -66,13 +66,35 @@ func BenchmarkSimple(b *testing.B) { name string }{ // ASCII - {"levenshtein", "frankenstein", "ASCII"}, + {a: "levenshtein", b: "frankenstein", name: "ASCII"}, // Testing acutes and umlauts - {"resumé and café", "resumés and cafés", "French"}, - {"Hafþór Júlíus Björnsson", "Hafþor Julius Bjornsson", "Nordic"}, - {"a very long string that is meant to exceed", "another very long string that is meant to exceed", "long string"}, + {a: "resumé and café", b: "resumés and cafés", name: "French"}, + {a: "Hafþór Júlíus Björnsson", b: "Hafþor Julius Bjornsson", name: "Nordic"}, + + // Long strings + { + a: "a very long string that is meant to exceed", + b: "another very long string that is meant to exceed", + name: "Long lead", + }, + { + a: "a very long string with a word in the middle that is different", + b: "a very long string with some text in the middle that is different", + name: "Long middle", + }, + { + a: "a very long string with some text at the end that is not the same", + b: "a very long string with some text at the end that is very different", + name: "Long trail", + }, + { + a: "+a very long string with different leading and trailing characters+", + b: "-a very long string with different leading and trailing characters-", + name: "Long diff", + }, + // Only 2 characters are less in the 2nd string - {"།་གམ་འས་པ་་མ།", "།་གམའས་པ་་མ", "Tibetan"}, + {a: "།་གམ་འས་པ་་མ།", b: "།་གམའས་པ་་མ", name: "Tibetan"}, } tmp := 0 for _, test := range tests {