-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
dddd06f
commit 20eb6f6
Showing
5 changed files
with
146 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import * as _ from 'radashi' | ||
import { bench } from 'vitest' | ||
|
||
describe('similarity', () => { | ||
const string1 = 'h'.repeat(100) | ||
const string2 = 'ha'.repeat(50) | ||
|
||
bench('with 50% similar characters', () => { | ||
_.similarity(string1, string2) | ||
}) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
--- | ||
title: similarity | ||
description: Compare two strings and return a similarity score | ||
--- | ||
|
||
### Usage | ||
|
||
Does a thing. Returns a value. | ||
|
||
```ts | ||
import * as _ from 'radashi' | ||
|
||
_.similarity() | ||
``` | ||
|
||
https://en.wikipedia.org/wiki/Levenshtein_distance |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/** | ||
* Calculate the similarity between two strings using the Levenshtein | ||
* distance algorithm. | ||
* | ||
* One thing to note is that the argument order is unimportant. The | ||
* algorithm will always return the same result regardless of the | ||
* order of the arguments. | ||
* | ||
* Adapted from | ||
* [@fabiospampinato/tiny-levenshtein](https://github.com/fabiospampinato/tiny-levenshtein) | ||
* with ❤️. | ||
* | ||
* @see https://radashi-org.github.io/reference/string/similarity | ||
* @example | ||
* ```ts | ||
* similarity('abc', 'abc') // 0 | ||
* similarity('a', 'b') // 1 | ||
* similarity('ab', 'ac') // 1 | ||
* similarity('ac', 'bc') // 1 | ||
* similarity('abc', 'axc') // 1 | ||
* similarity('kitten', 'sitting') // 3 | ||
* ``` | ||
*/ | ||
export function similarity(str1: string, str2: string): number { | ||
// Early return if strings are identical | ||
if (str1 === str2) { | ||
return 0 | ||
} | ||
|
||
// Find common prefix and suffix | ||
let start = 0 | ||
let end1 = str1.length - 1 | ||
let end2 = str2.length - 1 | ||
|
||
while (start <= end1 && start <= end2 && str1[start] === str2[start]) { | ||
start++ | ||
} | ||
|
||
while (end1 >= start && end2 >= start && str1[end1] === str2[end2]) { | ||
end1-- | ||
end2-- | ||
} | ||
|
||
// Calculate lengths of trimmed strings | ||
const length1 = end1 - start + 1 | ||
const length2 = end2 - start + 1 | ||
|
||
// Handle cases where one string is a substring of the other | ||
if (length1 === 0) { | ||
return length2 | ||
} | ||
if (length2 === 0) { | ||
return length1 | ||
} | ||
|
||
const numRows = length1 + 1 | ||
const numColumns = length2 + 1 | ||
|
||
const distances = new Array<number>(numRows * numColumns).fill(0) | ||
|
||
for (let x = 1; x < numColumns; x++) { | ||
distances[x] = x | ||
} | ||
for (let y = 1; y < numRows; y++) { | ||
distances[y * numColumns] = y | ||
} | ||
|
||
for (let x = 1; x < numColumns; x++) { | ||
for (let y = 1; y < numRows; y++) { | ||
const i = y * numColumns + x | ||
distances[i] = Math.min( | ||
// Cost of a deletion. | ||
distances[i - numColumns] + 1, | ||
// Cost of an insertion. | ||
distances[i - 1] + 1, | ||
// Cost of a substitution. | ||
distances[i - numColumns - 1] + | ||
(str1[start + y - 1] === str2[start + x - 1] ? 0 : 1), | ||
) | ||
} | ||
} | ||
|
||
// Return the Levenshtein distance | ||
return distances[length1 * numColumns + length2] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import * as _ from 'radashi' | ||
|
||
describe('similarity', () => { | ||
// https://github.com/fabiospampinato/tiny-levenshtein/blob/master/test/index.js | ||
test('returns the distance between two strings', () => { | ||
expect(_.similarity('abc', 'abc')).toBe(0) | ||
expect(_.similarity('a', 'b')).toBe(1) | ||
expect(_.similarity('ab', 'ac')).toBe(1) | ||
expect(_.similarity('ac', 'bc')).toBe(1) | ||
expect(_.similarity('abc', 'axc')).toBe(1) | ||
expect(_.similarity('kitten', 'sitting')).toBe(3) | ||
expect(_.similarity('xabxcdxxefxgx', '1ab2cd34ef5g6')).toBe(6) | ||
expect(_.similarity('cat', 'cow')).toBe(2) | ||
expect(_.similarity('xabxcdxxefxgx', 'abcdefg')).toBe(6) | ||
expect(_.similarity('javawasneat', 'scalaisgreat')).toBe(7) | ||
expect(_.similarity('example', 'samples')).toBe(3) | ||
expect(_.similarity('sturgeon', 'urgently')).toBe(6) | ||
expect(_.similarity('levenshtein', 'frankenstein')).toBe(6) | ||
expect(_.similarity('distance', 'difference')).toBe(5) | ||
expect( | ||
_.similarity( | ||
'因為我是中國人所以我會說中文', | ||
'因為我是英國人所以我會說英文', | ||
), | ||
).toBe(2) | ||
}) | ||
test('containment', () => { | ||
expect(_.similarity('abababab', 'ab')).toBe(6) | ||
expect(_.similarity('ab', 'abababab')).toBe(6) | ||
expect(_.similarity('abc', 'ab')).toBe(1) | ||
expect(_.similarity('ab', 'abc')).toBe(1) | ||
}) | ||
}) |