Skip to content

Commit

Permalink
Merge pull request #90 from life4/drop-abydos
Browse files Browse the repository at this point in the history
Drop abydos
  • Loading branch information
orsinium authored Sep 28, 2023
2 parents 31fe59c + e66149f commit 518197a
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 61 deletions.
40 changes: 20 additions & 20 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,26 +45,26 @@ jobs:
repo-token: ${{ github.token }}
- run: task pytest-pure

# pytest-external:
# runs-on: ubuntu-latest
# strategy:
# fail-fast: false
# matrix:
# python-version:
# - "3.8"
# - "3.9"
# - "3.10"
# - "3.11"
# # - "3.12.0-rc.1"
# steps:
# - uses: actions/checkout@v3
# - uses: actions/setup-python@v4
# with:
# python-version: ${{ matrix.python-version }}
# - uses: arduino/setup-task@v1
# with:
# repo-token: ${{ github.token }}
# - run: task pytest-external
pytest-external:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
# - "3.12.0-rc.1"
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- uses: arduino/setup-task@v1
with:
repo-token: ${{ github.token }}
- run: task pytest-external

markdownlint-cli:
runs-on: ubuntu-latest
Expand Down
4 changes: 0 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,6 @@ hamming('text', 'testit')

Supported libraries:

1. [abydos](https://github.com/chrislit/abydos)
1. [Distance](https://github.com/doukremt/distance)
1. [jellyfish](https://github.com/jamesturk/jellyfish)
1. [py_stringmatching](https://github.com/anhaidgroup/py_stringmatching)
Expand All @@ -245,13 +244,11 @@ Without extras installation:
| DamerauLevenshtein | rapidfuzz | 0.00312 |
| DamerauLevenshtein | jellyfish | 0.00591 |
| DamerauLevenshtein | pyxdameraulevenshtein | 0.03335 |
| DamerauLevenshtein | abydos | 0.63278 |
| DamerauLevenshtein | **textdistance** | 0.83524 |
| Hamming | Levenshtein | 0.00038 |
| Hamming | rapidfuzz | 0.00044 |
| Hamming | jellyfish | 0.00091 |
| Hamming | distance | 0.00812 |
| Hamming | abydos | 0.00902 |
| Hamming | **textdistance** | 0.03531 |
| Jaro | rapidfuzz | 0.00092 |
| Jaro | jellyfish | 0.00191 |
Expand All @@ -265,7 +262,6 @@ Without extras installation:
| Levenshtein | pylev | 0.15688 |
| Levenshtein | distance | 0.28669 |
| Levenshtein | **textdistance** | 0.53902 |
| Levenshtein | abydos | 1.25783 |

Total: 24 libs.

Expand Down
10 changes: 0 additions & 10 deletions constraints.txt

This file was deleted.

3 changes: 0 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
extras = {
# enough for simple usage
'extras': [
'abydos',
'jellyfish', # for DamerauLevenshtein
'numpy', # for SmithWaterman and other
'python-Levenshtein', # for Jaro and Levenshtein
Expand All @@ -18,7 +17,6 @@
# needed for benchmarking, optimization and testing
'benchmark': [
# common
'abydos',
'jellyfish',
'numpy',
'python-Levenshtein',
Expand Down Expand Up @@ -68,7 +66,6 @@
'rapidfuzz>=2.6.0', # only same length, any iterators of hashable elements
'jellyfish', # only strings, any length
'distance', # only same length, any iterators
'abydos', # any iterators
],
'Jaro': [
'rapidfuzz>=2.6.0', # any iterators of hashable elements
Expand Down
28 changes: 17 additions & 11 deletions tests/test_external.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from __future__ import annotations

# built-in
import string
from math import isclose

# external
import hypothesis
import hypothesis.strategies
import pytest

# project
Expand All @@ -12,15 +16,7 @@

libraries = prototype.clone()

# numpy throws a bunch of warning about abydos using `np.int` isntead of `int`.
ABYDOS_WARNINGS = (
'ignore:`np.int` is a deprecated alias',
'ignore:`np.float` is a deprecated alias',
'ignore:Using or importing the ABCs',
)


@pytest.mark.filterwarnings(*ABYDOS_WARNINGS)
@pytest.mark.external
@pytest.mark.parametrize('alg', libraries.get_algorithms())
@hypothesis.settings(deadline=None)
Expand All @@ -30,6 +26,12 @@
)
def test_compare(left, right, alg):
for lib in libraries.get_libs(alg):

if lib.module_name == 'jellyfish':
ascii = set(string.printable)
if (set(left) | set(right)) - ascii:
continue

conditions = lib.conditions or {}
internal_func = getattr(textdistance, alg)(external=False, **conditions)
external_func = lib.get_function()
Expand All @@ -45,16 +47,21 @@ def test_compare(left, right, alg):
assert isclose(int_result, ext_result), str(lib)


@pytest.mark.filterwarnings(*ABYDOS_WARNINGS)
@pytest.mark.external
@pytest.mark.parametrize('alg', libraries.get_algorithms())
@hypothesis.given(
left=hypothesis.strategies.text(min_size=1),
right=hypothesis.strategies.text(min_size=1),
)
@pytest.mark.parametrize('qval', (None, 1, 2, 3))
def test_qval(left, right, alg, qval):
def test_qval(left: str, right: str, alg: str, qval: int | None) -> None:
for lib in libraries.get_libs(alg):

if lib.module_name == 'jellyfish':
ascii = set(string.printable)
if (set(left) | set(right)) - ascii:
continue

conditions = lib.conditions or {}
internal_func = getattr(textdistance, alg)(external=False, **conditions)
external_func = lib.get_function()
Expand All @@ -80,7 +87,6 @@ def test_qval(left, right, alg, qval):
assert isclose(int_result, ext_result), f'{lib}({repr(s1)}, {repr(s2)})'


@pytest.mark.filterwarnings(*ABYDOS_WARNINGS)
@pytest.mark.external
@pytest.mark.parametrize('alg', libraries.get_algorithms())
@hypothesis.given(
Expand Down
20 changes: 7 additions & 13 deletions textdistance/libraries.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ def optimize(self) -> None:
# sort libs by speed
self.libs[alg].sort(key=lambda lib: libs_names.index([lib.module_name, lib.func_name]))

def get_algorithms(self) -> list:
def get_algorithms(self) -> list[str]:
"""Get list of available algorithms.
"""
return list(self.libs.keys())

def get_libs(self, alg) -> list[LibraryBase]:
def get_libs(self, alg: str) -> list[LibraryBase]:
"""Get libs list for algorithm
"""
if alg not in self.libs:
Expand All @@ -69,7 +69,7 @@ def __init__(
*,
presets: dict[str, Any] | None = None,
attr: str | None = None,
conditions: dict[str, Any] | None = None,
conditions: dict[str, bool] | None = None,
) -> None:
self.module_name = module_name
self.func_name = func_name
Expand All @@ -89,7 +89,7 @@ def check_conditions(self, obj: object, *sequences: Sequence) -> bool:

return True

def prepare(self, *sequences) -> tuple:
def prepare(self, *sequences: Sequence) -> tuple:
return sequences

@property
Expand Down Expand Up @@ -128,7 +128,7 @@ def __str__(self) -> str:


class TextLibrary(LibraryBase):
def check_conditions(self, obj, *sequences: Sequence) -> bool:
def check_conditions(self, obj: object, *sequences: Sequence) -> bool:
if not super().check_conditions(obj, *sequences):
return False

Expand All @@ -142,15 +142,15 @@ def check_conditions(self, obj, *sequences: Sequence) -> bool:
return False
return True

def prepare(self, *sequences) -> tuple:
def prepare(self, *sequences: Sequence) -> tuple:
# convert list of letters to string
if isinstance(sequences[0], (tuple, list)):
sequences = tuple(map(lambda x: ''.join(x), sequences))
return sequences


class SameLengthLibrary(LibraryBase):
def check_conditions(self, obj, *sequences: Sequence) -> bool:
def check_conditions(self, obj: object, *sequences: Sequence) -> bool:
if not super().check_conditions(obj, *sequences):
return False
# compare only same length iterators
Expand All @@ -167,17 +167,12 @@ class SameLengthTextLibrary(SameLengthLibrary, TextLibrary):
reg = prototype.register

alg = 'DamerauLevenshtein'
reg(alg, LibraryBase(
'abydos.distance', 'DamerauLevenshtein', presets={}, attr='dist_abs',
conditions=dict(restricted=False),
))
reg(alg, LibraryBase('pyxdameraulevenshtein', 'damerau_levenshtein_distance', conditions=dict(restricted=True)))
reg(alg, TextLibrary('jellyfish', 'damerau_levenshtein_distance', conditions=dict(restricted=False)))
reg(alg, LibraryBase('rapidfuzz.distance.DamerauLevenshtein', 'distance', conditions=dict(restricted=False)))
reg(alg, LibraryBase('rapidfuzz.distance.OSA', 'distance', conditions=dict(restricted=True)))

alg = 'Hamming'
reg(alg, LibraryBase('abydos.distance', 'Hamming', presets={}, attr='dist_abs'))
reg(alg, SameLengthLibrary('distance', 'hamming'))
reg(alg, SameLengthTextLibrary('Levenshtein', 'hamming'))
reg(alg, TextLibrary('jellyfish', 'hamming_distance'))
Expand All @@ -197,7 +192,6 @@ class SameLengthTextLibrary(SameLengthLibrary, TextLibrary):
# reg(alg, TextLibrary('Levenshtein', 'jaro_winkler', conditions=dict(winklerize=True)))

alg = 'Levenshtein'
reg(alg, LibraryBase('abydos.distance', 'Levenshtein', presets={}, attr='dist_abs'))
reg(alg, LibraryBase('distance', 'levenshtein'))
reg(alg, LibraryBase('pylev', 'levenshtein'))
reg(alg, TextLibrary('jellyfish', 'levenshtein_distance'))
Expand Down

0 comments on commit 518197a

Please sign in to comment.